summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore4
-rw-r--r--main.c272
2 files changed, 211 insertions, 65 deletions
diff --git a/.gitignore b/.gitignore
index 99abf72..0e90977 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,7 @@
main
.gdb_history
.nn*
+vgcore*
+*.cs
+compile_commands.json
+.cache
diff --git a/main.c b/main.c
index 60c911b..d80bbc2 100644
--- a/main.c
+++ b/main.c
@@ -63,8 +63,8 @@ struct nn {
static const struct layer_spec layers[] = {
{ IDENTITY, true, 28 * 28 },
- { SIGMOID, false, 10 },
- //{ IDENTITY, false, 10 },
+ { SIGMOID, true, 20 },
+ { SOFTMAX, false, 10 },
};
static const uint8_t idx_dtype_size[0x100] = {
@@ -79,7 +79,7 @@ static const uint8_t idx_dtype_size[0x100] = {
static bool quit = false;
void
-sigint(int sig)
+train_stop(int sig)
{
quit = true;
printf("QUIT\n");
@@ -277,7 +277,7 @@ nn_gen(struct nn *nn)
/* initial weights */
for (l = 0; l < nn->layers - 1; l++) {
for (s = 0; s < nn->layer[l].nodes; s++) {
- for (t = 0; t < nn->layer[l+1].nodes; t++) {
+ for (t = 0; t < nn->layer[l+1].len; t++) {
if (getrandom(&val, 4, 0) != 4)
err(1, "getrandom");
nn->weights[l][s][t] =
@@ -309,10 +309,10 @@ nn_load(struct nn *nn, const char *path)
assert(idx.dtype == F64);
assert(idx.dims == 2);
assert(idx.dim[0] == nn->layer[l].nodes);
- assert(idx.dim[1] == nn->layer[l+1].nodes);
+ assert(idx.dim[1] == nn->layer[l+1].len);
snodes = nn->layer[l].nodes;
for (s = 0; s < nn->layer[l].nodes; s++) {
- for (t = 0; t < nn->layer[l+1].nodes; t++) {
+ for (t = 0; t < nn->layer[l+1].len; t++) {
weight = ((double*)idx.data)[t * snodes + s];
nn->weights[l][s][t] = dbl_be64toh(weight);
}
@@ -343,17 +343,17 @@ nn_save(struct nn *nn, const char *path)
/* save weights */
for (l = 0; l < nn->layers - 1; l++) {
idx.data = malloc(nn->layer[l].nodes
- * nn->layer[l+1].nodes * sizeof(double));
+ * nn->layer[l+1].len * sizeof(double));
if (!idx.data) err(1, "malloc");
snodes = nn->layer[l].nodes;
for (s = 0; s < nn->layer[l].nodes; s++) {
- for (t = 0; t < nn->layer[l+1].nodes; t++) {
+ for (t = 0; t < nn->layer[l+1].len; t++) {
weight = dbl_htobe64(nn->weights[l][s][t]);
((double *)idx.data)[t * snodes + s] = weight;
}
}
idx.dim[0] = nn->layer[l].nodes;
- idx.dim[1] = nn->layer[l+1].nodes;
+ idx.dim[1] = nn->layer[l+1].len;
idx_save(&idx, file, path);
free(idx.data);
}
@@ -383,6 +383,7 @@ nn_free(struct nn *nn)
}
}
+ free(nn->layer);
free(nn->weights);
free(nn->deltas);
}
@@ -551,8 +552,8 @@ nn_debug_prediction(struct nn *nn, uint8_t label)
printf("\n");
}
-void
-print_weight_pix(double weight)
+int
+weight_color(double weight)
{
int color;
@@ -581,9 +582,8 @@ print_weight_pix(double weight)
color = 196;
}
}
- printf("\x1b[38:5:%im", color);
- printf("%s", fabs(weight) >= 0.0001 ? "▮" : " ");
- printf("\x1b[0m");
+
+ return color;
}
void
@@ -598,37 +598,28 @@ nn_dump(struct nn *nn)
for (y = 0; y < 28; y++) {
for (x = 0; x < 28 + (y == 27); x++) {
weight = nn->weights[0][y * 28 + x][t];
- print_weight_pix(weight);
+ printf("\x1b[38:5:%im%s\x1b[0m",
+ weight_color(weight),
+ fabs(weight) >= 0.0001 ? "▮" : " ");
}
printf("\n");
}
printf("\n");
}
- //printf("HIDDEN -> OUTPUT\n");
- //for (t = 0; t < nn->layer[2].len; t++) {
- // for (s = 0; s < nn->layer[1].nodes; s++) {
- // weight = nn->weights[1][s][t];
- // print_weight_pix(weight);
- // }
- // printf("\n");
- //}
-
-}
-
-void
-nn_check_error(struct nn *nn, uint8_t *image, uint8_t *label)
-{
- int i;
-
- printf("ERROR:\n");
- nn_fwdprop(nn, image);
- for (i = 0; i < nn->output->len; i++) {
- printf("OUT %i: %F %F\n", i,
- nn->output->activity[i],
- fabs(nn->output->activity[i]
- - (*label == i ? 1.0 : 0.0)));
+ if (nn->layers > 2) {
+ printf("HIDDEN -> OUTPUT\n");
+ for (t = 0; t < nn->layer[2].len; t++) {
+ for (s = 0; s < nn->layer[1].nodes; s++) {
+ weight = nn->weights[1][s][t];
+ printf("\x1b[38:5:%im%s\x1b[0m",
+ weight_color(weight),
+ fabs(weight) >= 0.0001 ? "▮" : " ");
+ }
+ printf("\n");
+ }
}
+
}
void
@@ -678,13 +669,31 @@ nn_apply_deltas(struct nn *nn, size_t size)
}
}
+int
+nn_result(struct nn *nn)
+{
+ double max;
+ int k, maxi;
+
+ maxi = -1;
+ for (k = 0; k < nn->output->len; k++) {
+ if (maxi < 0 || nn->output->activity[k] > max) {
+ max = nn->output->activity[k];
+ maxi = k;
+ }
+ }
+
+ return maxi;
+}
+
double
nn_test(struct nn *nn)
{
struct idx images;
struct idx labels;
size_t hits, total;
- int i, k, maxi;
+ int i, k, res;
+ uint8_t label;
double max;
idx_load_images(&images, "data/test-images.idx");
@@ -693,15 +702,10 @@ nn_test(struct nn *nn)
total = hits = 0;
for (i = 0; i < images.dim[0]; i++) {
nn_fwdprop(nn, images.data + i * nn->input->len);
- maxi = -1;
- for (k = 0; k < nn->output->len; k++) {
- if (maxi < 0 || nn->output->activity[k] > max) {
- max = nn->output->activity[k];
- maxi = k;
- }
- }
- if (maxi == *(uint8_t*)(labels.data + i))
- hits++;
+ label = *(uint8_t*)(labels.data + i);
+ nn_debug_prediction(nn, label);
+ res = nn_result(nn);
+ if (res == label) hits++;
total++;
}
@@ -734,7 +738,7 @@ nn_batch(struct nn *nn, struct idx *images, struct idx *labels,
- (k == label ? 1.0 : 0.0);
error += 0.5 * lerror * lerror;
}
- nn_debug_prediction(nn, label);
+ //nn_debug_prediction(nn, label);
nn_backprop(nn, label);
nn_update_deltas(nn, learning_rate);
}
@@ -753,6 +757,8 @@ nn_train(struct nn *nn, size_t epochs,
double error;
int epoch, i;
+ signal(SIGINT, train_stop);
+
idx_load_images(&images, "data/train-images.idx");
idx_load_labels(&labels, "data/train-labels.idx");
@@ -761,15 +767,16 @@ nn_train(struct nn *nn, size_t epochs,
for (i = 0; i < images.dim[0] / batch_size; i++) {
error = nn_batch(nn, &images, &labels,
batch_size, learning_rate);
- if (i % 1 == 0) {
- nn_debug(nn);
- // nn_check_error(nn, images.data, labels.data);
+ if (i % 100 == 0) {
+ //nn_debug(nn);
//nn_dump(nn);
printf("Batch %i / %lu => %2.5F\n", i + 1,
images.dim[0] / batch_size, error);
}
- nn_save(nn, nn->filepath);
- if (quit) exit(1);
+ if (quit) {
+ nn_save(nn, nn->filepath);
+ exit(1);
+ }
}
}
@@ -778,24 +785,162 @@ nn_train(struct nn *nn, size_t epochs,
}
void
-nn_trainvis(struct nn *nn, size_t epochs,
- size_t batch_size, double learning_rate)
+nn_trainvis(struct nn *nn, size_t batch_size, double learning_rate)
{
+ struct idx images;
+ struct idx labels;
+ double error, weight;
+ int epoch, i;
+ int t, x, y;
+ int sx, sy;
+ bool show;
+
/* display weights visually after each batch
* and adjust batch frequency via UP / DOWN */
+ signal(SIGINT, train_stop);
+
+ idx_load_images(&images, "data/train-images.idx");
+ idx_load_labels(&labels, "data/train-labels.idx");
+
+ printf("\x1b[?25l"); /* hide cursor */
+ printf("\x1b[2J"); /* clear screen */
+
+ while (!quit) {
+ error = nn_batch(nn, &images, &labels,
+ batch_size, learning_rate);
+ if (quit) {
+ nn_save(nn, nn->filepath);
+ break;
+ }
+
+ printf("\x1b[%i;%iHTraining error: %F", 2, 0, error);
+
+ assert(nn->layers > 1);
+ for (t = 0; t < nn->layer[1].len; t++) {
+ sy = (t >= nn->layer[1].len / 2) ? 35 : 5;
+ sx = 2 + 30 * (t % (nn->layer[1].len / 2));
+ for (y = 0; y < 28; y++) {
+ for (x = 0; x < 28 + (y == 27); x++) {
+ weight = nn->weights[0][y * 28 + x][t];
+ show = fabs(weight) >= 0.0001;
+ printf("\x1b[%i;%iH", sy + y, sx + x);
+ printf("\x1b[38:5:%im%s\x1b[0m",
+ weight_color(weight),
+ show ? "▮" : " ");
+ }
+ }
+ }
+ }
+
+ printf("\x1b[?25h"); /* show cursor */
+ printf("\x1b[2J"); /* clear screen */
+ idx_free(&images);
+ idx_free(&labels);
}
void
nn_predict(struct nn *nn)
{
- struct idx images, labels;
+ uint8_t image[28*28];
+ WINDOW *win;
+ MEVENT event;
+ int width, height;
+ int startx, starty;
+ int x, y, c, i, label;
+ bool evaluate;
/* gui interface to draw input and show prediction */
-
-
+ /* TODO: 256 color support, is this portable? */
+ setenv("TERM", "xterm-1002", 1);
+
+ initscr();
+ keypad(stdscr, true);
+ noecho();
+ cbreak();
+ curs_set(0);
+
+ mousemask(ALL_MOUSE_EVENTS | REPORT_MOUSE_POSITION, NULL);
+
+ win = NULL;
+ label = -1;
+ evaluate = true;
+ memset(image, 0, sizeof(image));
+ while (!quit) {
+ width = getmaxx(stdscr);
+ height = getmaxy(stdscr);
+ assert(width >= 30 && height >= 31);
+
+ startx = (width - 30) / 2;
+ starty = (height - 30) / 2;
+
+ if (evaluate) {
+ nn_fwdprop(nn, image);
+ label = nn_result(nn);
+ evaluate = false;
+ }
+
+ if (!win) {
+ win = newwin(30, 30, starty, startx);
+ if (!win) err(1, "newwin");
+ } else {
+ mvwin(win, starty, startx);
+ }
+
+ clear();
+
+ mvprintw(starty - 1, startx - 1, "Predictions: ");
+ for (i = 0; i < nn->output->len; i++) {
+ if (i == label) attron(A_UNDERLINE);
+ if (nn->output->activity[i] >= 0.2)
+ attron(A_BOLD);
+ printw("%i", i);
+ attroff(A_BOLD);
+ attroff(A_UNDERLINE);
+ printw(" ");
+ }
+ refresh();
+ box(win, 0, 0);
+ for (y = 0; y < 28; y++) {
+ for (x = 0; x < 28; x++) {
+ if (image[y * 28 + x])
+ mvwaddch(win, 1 + y, 1 + x, ACS_BLOCK);
+ else
+ mvwaddch(win, 1 + y, 1 + x, ' ');
+ }
+ }
+ wrefresh(win);
+
+ switch ((c = getch())) {
+ case KEY_MOUSE:
+ if (getmouse(&event) != OK)
+ err(1, "getmouse");
+ x = event.x - (startx + 1);
+ y = event.y - (starty + 1);
+ if (x < 0 || x >= 28) continue;
+ if (y < 0 || y >= 28) continue;
+ image[y * 28 + x] = 1;
+ if (y > 0) image[(y-1) * 28 + x] = 1;
+ if (y < 27) image[(y+1) * 28 + x] = 1;
+ if (x > 0) image[y * 28 + x - 1] = 1;
+ if (x < 27) image[y * 28 + x + 1] = 1;
+ if (event.bstate & BUTTON1_RELEASED ||
+ event.bstate & BUTTON2_RELEASED)
+ evaluate = true;
+ break;
+ case 'c':
+ memset(image, 0, sizeof(image));
+ break;
+ case 'q':
+ quit = true;
+ break;
+ }
+ }
+
+ delwin(win);
+ endwin();
}
void
@@ -835,8 +980,6 @@ main(int argc, const char **argv)
{
struct nn nn;
- signal(SIGINT, sigint);
-
if (argc == 2 && !strcmp(argv[1], "gen")) {
nn_init(&nn, layers, ARRLEN(layers));
nn_gen(&nn);
@@ -845,20 +988,19 @@ main(int argc, const char **argv)
} else if (argc == 2 && !strcmp(argv[1], "train")) {
nn_init(&nn, layers, ARRLEN(layers));
nn_load(&nn, ".nn");
- nn_train(&nn, 1, 10, 0.01);
+ nn_train(&nn, 10, 5, 0.005);
nn_save(&nn, ".nn");
nn_free(&nn);
} else if (argc == 2 && !strcmp(argv[1], "trainvis")) {
nn_init(&nn, layers, ARRLEN(layers));
nn_load(&nn, ".nn");
- nn_trainvis(&nn, 1, 10, 0.02);
+ nn_trainvis(&nn, 5, 0.005);
nn_save(&nn, ".nn");
nn_free(&nn);
} else if (argc == 2 && !strcmp(argv[1], "predict")) {
nn_init(&nn, layers, ARRLEN(layers));
nn_load(&nn, ".nn");
nn_predict(&nn);
- nn_save(&nn, ".nn");
nn_free(&nn);
} else if (argc == 2 && !strcmp(argv[1], "test")) {
nn_init(&nn, layers, ARRLEN(layers));
@@ -873,7 +1015,7 @@ main(int argc, const char **argv)
} else if (argc == 4 && !strcmp(argv[1], "sample")) {
dump_sample(argv[2], atoi(argv[3]));
} else {
- printf("USAGE: main (gen|train|test|sample) [ARGS..]\n");
+ printf("Commands: gen train trainvis predict test dump sample\n");
}
}