summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLouis Burda <quent.burda@gmail.com>2023-06-21 15:52:24 +0200
committerLouis Burda <quent.burda@gmail.com>2023-06-21 16:01:33 +0200
commite4a3e0eac48378efb23510650c1c465914d4ee94 (patch)
tree601f140b4a133512b205b5953395bbf0c60ef4cf
downloaddebruijn-e4a3e0eac48378efb23510650c1c465914d4ee94.tar.gz
debruijn-e4a3e0eac48378efb23510650c1c465914d4ee94.zip
Add initial version
-rw-r--r--.gitignore3
-rw-r--r--Makefile25
-rw-r--r--main.c91
3 files changed, 119 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..bade94f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+debruijn
+compile_commands.json
+.cache
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..dd5ad44
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,25 @@
+CFLAGS = -Wunused-variable -Wunused-function -Wconversion
+
+ifeq ($(DEBUG),1)
+CFLAGS += -Og -g
+else
+CFLAGS += -O2
+endif
+
+all: debruijn
+
+debruijn: main.c
+ $(CC) -o $@ $< $(CFLAGS)
+
+cleanall: clean
+
+clean:
+ rm -f debruijn
+
+install:
+ install -m 755 debruijn -t "$(DESTDIR)$(PREFIX)$(BINDIR)"
+
+uninstall:
+ rm -f "$(DESTDIR)$(PREFIX)$(BINDIR)/debruijn"
+
+.PHONY: all cleanall clean install uninstall
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..bb5f41b
--- /dev/null
+++ b/main.c
@@ -0,0 +1,91 @@
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdarg.h>
+
+/* De Bruijn sequence generation based on Burrows-Wheeler transform */
+
+static void
+die(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "debruijn: ");
+ vfprintf(stderr, fmt, ap);
+ if (*fmt && fmt[strlen(fmt)-1] == ':') {
+ fputc(' ', stderr);
+ perror(NULL);
+ } else {
+ fputc('\n', stderr);
+ }
+ va_end(ap);
+
+ exit(1);
+}
+
+static void
+gen(bool *visited, const char *alph, size_t k, size_t n, size_t len, size_t cnt)
+{
+ size_t i, si, so, plen, out;
+
+ plen = len / k;
+ out = 0;
+ memset(visited, 0, len);
+ for (i = 0; i < len; i++) {
+ if (visited[i]) continue;
+ so = i;
+ do {
+ si = so;
+
+ putchar(alph[si / plen]);
+ if (++out == cnt) return;
+ visited[si] = true;
+
+ /* (sorted -> unsorted) translation */
+ so = (si % plen) * k + si / plen;
+ } while (!visited[so]);
+ }
+}
+
+int
+main(int argc, const char **argv)
+{
+ const char **arg;
+ const char *alph_arg, *n_arg;
+ size_t i, k, n, len;
+ bool all, *visited;
+
+ all = false;
+ alph_arg = n_arg = NULL;
+ for (arg = argv + 1; *arg; arg++) {
+ if (!strcmp(*arg, "-h") || !strcmp(*arg, "--help")) {
+ break;
+ } else if (!strcmp(*arg, "-a")) {
+ all = true;
+ } else if (!alph_arg) {
+ alph_arg = *arg;
+ } else if (!n_arg) {
+ n_arg = *arg;
+ } else {
+ break;
+ }
+ }
+
+ if (*arg || !alph_arg || !n_arg) {
+ fprintf(stderr, "Usage: debruijn ALPH N\n");
+ return 1;
+ }
+
+ k = strlen(alph_arg);
+ n = strtoull(n_arg, NULL, 10);
+ if (!k || !n) die("invalid args");
+
+ for (len = 1, i = 0; i < n; i++) len *= k;
+ visited = malloc(len);
+ if (!visited) die("malloc:");
+
+ gen(visited, alph_arg, k, n, len, len);
+ if (all) gen(visited, alph_arg, k, n, len, k - 1);
+}