diff options
| author | Louis Burda <quent.burda@gmail.com> | 2023-06-21 15:52:24 +0200 |
|---|---|---|
| committer | Louis Burda <quent.burda@gmail.com> | 2023-06-21 16:01:33 +0200 |
| commit | e4a3e0eac48378efb23510650c1c465914d4ee94 (patch) | |
| tree | 601f140b4a133512b205b5953395bbf0c60ef4cf | |
| download | debruijn-e4a3e0eac48378efb23510650c1c465914d4ee94.tar.gz debruijn-e4a3e0eac48378efb23510650c1c465914d4ee94.zip | |
Add initial version
| -rw-r--r-- | .gitignore | 3 | ||||
| -rw-r--r-- | Makefile | 25 | ||||
| -rw-r--r-- | main.c | 91 |
3 files changed, 119 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bade94f --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +debruijn +compile_commands.json +.cache diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..dd5ad44 --- /dev/null +++ b/Makefile @@ -0,0 +1,25 @@ +CFLAGS = -Wunused-variable -Wunused-function -Wconversion + +ifeq ($(DEBUG),1) +CFLAGS += -Og -g +else +CFLAGS += -O2 +endif + +all: debruijn + +debruijn: main.c + $(CC) -o $@ $< $(CFLAGS) + +cleanall: clean + +clean: + rm -f debruijn + +install: + install -m 755 debruijn -t "$(DESTDIR)$(PREFIX)$(BINDIR)" + +uninstall: + rm -f "$(DESTDIR)$(PREFIX)$(BINDIR)/debruijn" + +.PHONY: all cleanall clean install uninstall @@ -0,0 +1,91 @@ +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdarg.h> + +/* De Bruijn sequence generation based on Burrows-Wheeler transform */ + +static void +die(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fprintf(stderr, "debruijn: "); + vfprintf(stderr, fmt, ap); + if (*fmt && fmt[strlen(fmt)-1] == ':') { + fputc(' ', stderr); + perror(NULL); + } else { + fputc('\n', stderr); + } + va_end(ap); + + exit(1); +} + +static void +gen(bool *visited, const char *alph, size_t k, size_t n, size_t len, size_t cnt) +{ + size_t i, si, so, plen, out; + + plen = len / k; + out = 0; + memset(visited, 0, len); + for (i = 0; i < len; i++) { + if (visited[i]) continue; + so = i; + do { + si = so; + + putchar(alph[si / plen]); + if (++out == cnt) return; + visited[si] = true; + + /* (sorted -> unsorted) translation */ + so = (si % plen) * k + si / plen; + } while (!visited[so]); + } +} + +int +main(int argc, const char **argv) +{ + const char **arg; + const char *alph_arg, *n_arg; + size_t i, k, n, len; + bool all, *visited; + + all = false; + alph_arg = n_arg = NULL; + for (arg = argv + 1; *arg; arg++) { + if (!strcmp(*arg, "-h") || !strcmp(*arg, "--help")) { + break; + } else if (!strcmp(*arg, "-a")) { + all = true; + } else if (!alph_arg) { + alph_arg = *arg; + } else if (!n_arg) { + n_arg = *arg; + } else { + break; + } + } + + if (*arg || !alph_arg || !n_arg) { + fprintf(stderr, "Usage: debruijn ALPH N\n"); + return 1; + } + + k = strlen(alph_arg); + n = strtoull(n_arg, NULL, 10); + if (!k || !n) die("invalid args"); + + for (len = 1, i = 0; i < n; i++) len *= k; + visited = malloc(len); + if (!visited) die("malloc:"); + + gen(visited, alph_arg, k, n, len, len); + if (all) gen(visited, alph_arg, k, n, len, k - 1); +} |
