diff options
| -rw-r--r-- | .gitignore | 4 | ||||
| -rw-r--r-- | main.c | 272 |
2 files changed, 211 insertions, 65 deletions
@@ -1,3 +1,7 @@ main .gdb_history .nn* +vgcore* +*.cs +compile_commands.json +.cache @@ -63,8 +63,8 @@ struct nn { static const struct layer_spec layers[] = { { IDENTITY, true, 28 * 28 }, - { SIGMOID, false, 10 }, - //{ IDENTITY, false, 10 }, + { SIGMOID, true, 20 }, + { SOFTMAX, false, 10 }, }; static const uint8_t idx_dtype_size[0x100] = { @@ -79,7 +79,7 @@ static const uint8_t idx_dtype_size[0x100] = { static bool quit = false; void -sigint(int sig) +train_stop(int sig) { quit = true; printf("QUIT\n"); @@ -277,7 +277,7 @@ nn_gen(struct nn *nn) /* initial weights */ for (l = 0; l < nn->layers - 1; l++) { for (s = 0; s < nn->layer[l].nodes; s++) { - for (t = 0; t < nn->layer[l+1].nodes; t++) { + for (t = 0; t < nn->layer[l+1].len; t++) { if (getrandom(&val, 4, 0) != 4) err(1, "getrandom"); nn->weights[l][s][t] = @@ -309,10 +309,10 @@ nn_load(struct nn *nn, const char *path) assert(idx.dtype == F64); assert(idx.dims == 2); assert(idx.dim[0] == nn->layer[l].nodes); - assert(idx.dim[1] == nn->layer[l+1].nodes); + assert(idx.dim[1] == nn->layer[l+1].len); snodes = nn->layer[l].nodes; for (s = 0; s < nn->layer[l].nodes; s++) { - for (t = 0; t < nn->layer[l+1].nodes; t++) { + for (t = 0; t < nn->layer[l+1].len; t++) { weight = ((double*)idx.data)[t * snodes + s]; nn->weights[l][s][t] = dbl_be64toh(weight); } @@ -343,17 +343,17 @@ nn_save(struct nn *nn, const char *path) /* save weights */ for (l = 0; l < nn->layers - 1; l++) { idx.data = malloc(nn->layer[l].nodes - * nn->layer[l+1].nodes * sizeof(double)); + * nn->layer[l+1].len * sizeof(double)); if (!idx.data) err(1, "malloc"); snodes = nn->layer[l].nodes; for (s = 0; s < nn->layer[l].nodes; s++) { - for (t = 0; t < nn->layer[l+1].nodes; t++) { + for (t = 0; t < nn->layer[l+1].len; t++) { weight = dbl_htobe64(nn->weights[l][s][t]); ((double *)idx.data)[t * snodes + s] = weight; } } idx.dim[0] = nn->layer[l].nodes; - idx.dim[1] = nn->layer[l+1].nodes; + idx.dim[1] = nn->layer[l+1].len; idx_save(&idx, file, path); free(idx.data); } @@ -383,6 +383,7 @@ nn_free(struct nn *nn) } } + free(nn->layer); free(nn->weights); free(nn->deltas); } @@ -551,8 +552,8 @@ nn_debug_prediction(struct nn *nn, uint8_t label) printf("\n"); } -void -print_weight_pix(double weight) +int +weight_color(double weight) { int color; @@ -581,9 +582,8 @@ print_weight_pix(double weight) color = 196; } } - printf("\x1b[38:5:%im", color); - printf("%s", fabs(weight) >= 0.0001 ? "▮" : " "); - printf("\x1b[0m"); + + return color; } void @@ -598,37 +598,28 @@ nn_dump(struct nn *nn) for (y = 0; y < 28; y++) { for (x = 0; x < 28 + (y == 27); x++) { weight = nn->weights[0][y * 28 + x][t]; - print_weight_pix(weight); + printf("\x1b[38:5:%im%s\x1b[0m", + weight_color(weight), + fabs(weight) >= 0.0001 ? "▮" : " "); } printf("\n"); } printf("\n"); } - //printf("HIDDEN -> OUTPUT\n"); - //for (t = 0; t < nn->layer[2].len; t++) { - // for (s = 0; s < nn->layer[1].nodes; s++) { - // weight = nn->weights[1][s][t]; - // print_weight_pix(weight); - // } - // printf("\n"); - //} - -} - -void -nn_check_error(struct nn *nn, uint8_t *image, uint8_t *label) -{ - int i; - - printf("ERROR:\n"); - nn_fwdprop(nn, image); - for (i = 0; i < nn->output->len; i++) { - printf("OUT %i: %F %F\n", i, - nn->output->activity[i], - fabs(nn->output->activity[i] - - (*label == i ? 1.0 : 0.0))); + if (nn->layers > 2) { + printf("HIDDEN -> OUTPUT\n"); + for (t = 0; t < nn->layer[2].len; t++) { + for (s = 0; s < nn->layer[1].nodes; s++) { + weight = nn->weights[1][s][t]; + printf("\x1b[38:5:%im%s\x1b[0m", + weight_color(weight), + fabs(weight) >= 0.0001 ? "▮" : " "); + } + printf("\n"); + } } + } void @@ -678,13 +669,31 @@ nn_apply_deltas(struct nn *nn, size_t size) } } +int +nn_result(struct nn *nn) +{ + double max; + int k, maxi; + + maxi = -1; + for (k = 0; k < nn->output->len; k++) { + if (maxi < 0 || nn->output->activity[k] > max) { + max = nn->output->activity[k]; + maxi = k; + } + } + + return maxi; +} + double nn_test(struct nn *nn) { struct idx images; struct idx labels; size_t hits, total; - int i, k, maxi; + int i, k, res; + uint8_t label; double max; idx_load_images(&images, "data/test-images.idx"); @@ -693,15 +702,10 @@ nn_test(struct nn *nn) total = hits = 0; for (i = 0; i < images.dim[0]; i++) { nn_fwdprop(nn, images.data + i * nn->input->len); - maxi = -1; - for (k = 0; k < nn->output->len; k++) { - if (maxi < 0 || nn->output->activity[k] > max) { - max = nn->output->activity[k]; - maxi = k; - } - } - if (maxi == *(uint8_t*)(labels.data + i)) - hits++; + label = *(uint8_t*)(labels.data + i); + nn_debug_prediction(nn, label); + res = nn_result(nn); + if (res == label) hits++; total++; } @@ -734,7 +738,7 @@ nn_batch(struct nn *nn, struct idx *images, struct idx *labels, - (k == label ? 1.0 : 0.0); error += 0.5 * lerror * lerror; } - nn_debug_prediction(nn, label); + //nn_debug_prediction(nn, label); nn_backprop(nn, label); nn_update_deltas(nn, learning_rate); } @@ -753,6 +757,8 @@ nn_train(struct nn *nn, size_t epochs, double error; int epoch, i; + signal(SIGINT, train_stop); + idx_load_images(&images, "data/train-images.idx"); idx_load_labels(&labels, "data/train-labels.idx"); @@ -761,15 +767,16 @@ nn_train(struct nn *nn, size_t epochs, for (i = 0; i < images.dim[0] / batch_size; i++) { error = nn_batch(nn, &images, &labels, batch_size, learning_rate); - if (i % 1 == 0) { - nn_debug(nn); - // nn_check_error(nn, images.data, labels.data); + if (i % 100 == 0) { + //nn_debug(nn); //nn_dump(nn); printf("Batch %i / %lu => %2.5F\n", i + 1, images.dim[0] / batch_size, error); } - nn_save(nn, nn->filepath); - if (quit) exit(1); + if (quit) { + nn_save(nn, nn->filepath); + exit(1); + } } } @@ -778,24 +785,162 @@ nn_train(struct nn *nn, size_t epochs, } void -nn_trainvis(struct nn *nn, size_t epochs, - size_t batch_size, double learning_rate) +nn_trainvis(struct nn *nn, size_t batch_size, double learning_rate) { + struct idx images; + struct idx labels; + double error, weight; + int epoch, i; + int t, x, y; + int sx, sy; + bool show; + /* display weights visually after each batch * and adjust batch frequency via UP / DOWN */ + signal(SIGINT, train_stop); + + idx_load_images(&images, "data/train-images.idx"); + idx_load_labels(&labels, "data/train-labels.idx"); + + printf("\x1b[?25l"); /* hide cursor */ + printf("\x1b[2J"); /* clear screen */ + + while (!quit) { + error = nn_batch(nn, &images, &labels, + batch_size, learning_rate); + if (quit) { + nn_save(nn, nn->filepath); + break; + } + + printf("\x1b[%i;%iHTraining error: %F", 2, 0, error); + + assert(nn->layers > 1); + for (t = 0; t < nn->layer[1].len; t++) { + sy = (t >= nn->layer[1].len / 2) ? 35 : 5; + sx = 2 + 30 * (t % (nn->layer[1].len / 2)); + for (y = 0; y < 28; y++) { + for (x = 0; x < 28 + (y == 27); x++) { + weight = nn->weights[0][y * 28 + x][t]; + show = fabs(weight) >= 0.0001; + printf("\x1b[%i;%iH", sy + y, sx + x); + printf("\x1b[38:5:%im%s\x1b[0m", + weight_color(weight), + show ? "▮" : " "); + } + } + } + } + + printf("\x1b[?25h"); /* show cursor */ + printf("\x1b[2J"); /* clear screen */ + idx_free(&images); + idx_free(&labels); } void nn_predict(struct nn *nn) { - struct idx images, labels; + uint8_t image[28*28]; + WINDOW *win; + MEVENT event; + int width, height; + int startx, starty; + int x, y, c, i, label; + bool evaluate; /* gui interface to draw input and show prediction */ - - + /* TODO: 256 color support, is this portable? */ + setenv("TERM", "xterm-1002", 1); + + initscr(); + keypad(stdscr, true); + noecho(); + cbreak(); + curs_set(0); + + mousemask(ALL_MOUSE_EVENTS | REPORT_MOUSE_POSITION, NULL); + + win = NULL; + label = -1; + evaluate = true; + memset(image, 0, sizeof(image)); + while (!quit) { + width = getmaxx(stdscr); + height = getmaxy(stdscr); + assert(width >= 30 && height >= 31); + + startx = (width - 30) / 2; + starty = (height - 30) / 2; + + if (evaluate) { + nn_fwdprop(nn, image); + label = nn_result(nn); + evaluate = false; + } + + if (!win) { + win = newwin(30, 30, starty, startx); + if (!win) err(1, "newwin"); + } else { + mvwin(win, starty, startx); + } + + clear(); + + mvprintw(starty - 1, startx - 1, "Predictions: "); + for (i = 0; i < nn->output->len; i++) { + if (i == label) attron(A_UNDERLINE); + if (nn->output->activity[i] >= 0.2) + attron(A_BOLD); + printw("%i", i); + attroff(A_BOLD); + attroff(A_UNDERLINE); + printw(" "); + } + refresh(); + box(win, 0, 0); + for (y = 0; y < 28; y++) { + for (x = 0; x < 28; x++) { + if (image[y * 28 + x]) + mvwaddch(win, 1 + y, 1 + x, ACS_BLOCK); + else + mvwaddch(win, 1 + y, 1 + x, ' '); + } + } + wrefresh(win); + + switch ((c = getch())) { + case KEY_MOUSE: + if (getmouse(&event) != OK) + err(1, "getmouse"); + x = event.x - (startx + 1); + y = event.y - (starty + 1); + if (x < 0 || x >= 28) continue; + if (y < 0 || y >= 28) continue; + image[y * 28 + x] = 1; + if (y > 0) image[(y-1) * 28 + x] = 1; + if (y < 27) image[(y+1) * 28 + x] = 1; + if (x > 0) image[y * 28 + x - 1] = 1; + if (x < 27) image[y * 28 + x + 1] = 1; + if (event.bstate & BUTTON1_RELEASED || + event.bstate & BUTTON2_RELEASED) + evaluate = true; + break; + case 'c': + memset(image, 0, sizeof(image)); + break; + case 'q': + quit = true; + break; + } + } + + delwin(win); + endwin(); } void @@ -835,8 +980,6 @@ main(int argc, const char **argv) { struct nn nn; - signal(SIGINT, sigint); - if (argc == 2 && !strcmp(argv[1], "gen")) { nn_init(&nn, layers, ARRLEN(layers)); nn_gen(&nn); @@ -845,20 +988,19 @@ main(int argc, const char **argv) } else if (argc == 2 && !strcmp(argv[1], "train")) { nn_init(&nn, layers, ARRLEN(layers)); nn_load(&nn, ".nn"); - nn_train(&nn, 1, 10, 0.01); + nn_train(&nn, 10, 5, 0.005); nn_save(&nn, ".nn"); nn_free(&nn); } else if (argc == 2 && !strcmp(argv[1], "trainvis")) { nn_init(&nn, layers, ARRLEN(layers)); nn_load(&nn, ".nn"); - nn_trainvis(&nn, 1, 10, 0.02); + nn_trainvis(&nn, 5, 0.005); nn_save(&nn, ".nn"); nn_free(&nn); } else if (argc == 2 && !strcmp(argv[1], "predict")) { nn_init(&nn, layers, ARRLEN(layers)); nn_load(&nn, ".nn"); nn_predict(&nn); - nn_save(&nn, ".nn"); nn_free(&nn); } else if (argc == 2 && !strcmp(argv[1], "test")) { nn_init(&nn, layers, ARRLEN(layers)); @@ -873,7 +1015,7 @@ main(int argc, const char **argv) } else if (argc == 4 && !strcmp(argv[1], "sample")) { dump_sample(argv[2], atoi(argv[3])); } else { - printf("USAGE: main (gen|train|test|sample) [ARGS..]\n"); + printf("Commands: gen train trainvis predict test dump sample\n"); } } |
