diff options
Diffstat (limited to 'fcomp.c')
-rw-r--r-- | fcomp.c | 234 |
1 files changed, 129 insertions, 105 deletions
@@ -75,8 +75,9 @@ typedef struct config { int lisp_print; /** 0: off 1: on */ int print_count; /** 0: off 1: on */ int print_all; /** 0: off 1: on */ + int all_uniq; /** 0: off 1: on */ int interactive; /** 0: off 1: on */ - int prompt; /** 0: off 1: on */ + int prompt; /** 0: off 1: on */ int filestream; /** 0: off 1: on */ char *file; char *query; @@ -92,26 +93,27 @@ typedef struct config { /** * Initial config vector */ static config cfg = { - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - NULL, - NULL, - 3, - 0, - NULL, - 0, - NULL, - starts, - 1000 + .help = 0, + .debug = 0, + .stats = 0, + .stdin = 1, + .reverse_sort = 0, + .lisp_print = 0, + .print_count = 0, + .print_all = 0, + .all_uniq = 0, + .interactive = 0, + .prompt = 1, + .filestream = 0, + .file = NULL, + .query = NULL, + .min_word_size = 3, + .evc = 0, + .extra_valid_chars = NULL, + .eic = 0, + .extra_invalid_chars = NULL, + .search_method = starts, + .max_token_size = 1000 }; static void print_help(char *argv0) @@ -147,6 +149,8 @@ static void print_help(char *argv0) "Print the count of each matched token"); fprintf(stderr, "%8s %4s %15s %50s\n", "-l", "", "lisp", "Print matched tokens in a lisp list"); + fprintf(stderr, "%8s %4s %15s %50s\n\n", "-u", "", "unique", + "When -a is passed, don't print duplicates"); fprintf(stderr, "%8s %4s %15s %50s\n", "-a", "", "all", "Print all tokens of input"); fprintf(stderr, "%8s %4s %15s %50s\n\n", "", "", "", @@ -191,6 +195,7 @@ static void print_cfg() fprintf(stderr, "[-l:%4s]\n", cfg.lisp_print ? "on" : "off"); fprintf(stderr, " [-c:%4s] ", cfg.print_count ? "on" : "off"); fprintf(stderr, "[-a:%4s] ", cfg.print_all ? "on" : "off"); + fprintf(stderr, "[-u:%4s] ", cfg.all_uniq ? "on" : "off"); fprintf(stderr, "[-v:%4s] ", cfg.evc ? "on" : "off"); fprintf(stderr, "[-i:%4s]\n", cfg.eic ? "on" : "off"); } @@ -218,18 +223,18 @@ static int parse_cli(int argc, char *argv[]) if (argc < 2) return -1; char c; - while ((c = getopt(argc, argv, "-hIF:xzsadclrf:i:v:w:t:")) != -1) { + while ((c = getopt(argc, argv, "-hIF:xzsaudclrf:i:v:w:t:")) != -1) { switch (c) { case 'I': - if (cfg.interactive) { - cfg.prompt = 0; - } + if (cfg.interactive) { + cfg.prompt = 0; + } cfg.interactive = 1; break; case 'F': cfg.stdin = 0; - cfg.filestream = 1; - cfg.file = optarg; + cfg.filestream = 1; + cfg.file = optarg; break; case 'h': cfg.help = 1; @@ -237,6 +242,9 @@ static int parse_cli(int argc, char *argv[]) case 'd': cfg.debug = 1; break; + case 'u': + cfg.all_uniq = 1; + break; case 's': cfg.stats = 1; break; @@ -608,7 +616,7 @@ static int tokenize(FILE * f, slist * l) while ((c = fgetc(f)) != EOF) { if (!is_valid(c)) { - st.discarded++; + st.discarded++; if (tmp) { finalize_str(tmp, n, l); free(tmp); @@ -647,10 +655,9 @@ static void get_slist_stats(slist * l) { if (l->n > 0) { char tmp[] = "Total tokens:"; - st.tok_results = - (char *) + st.tok_results = (char *) malloc((strlen(tmp) + snprintf(NULL, 0, "%d", l->n) + - 3) * sizeof(char)); + 4) * sizeof(char)); sprintf(st.tok_results, "%s\t\t%d\n", tmp, l->n); } } @@ -659,10 +666,9 @@ static void get_search_stats(slist * l) { if (l->n > 0) { char tmp[] = "Total matches:"; - st.search_results = - (char *) + st.search_results = (char *) malloc((strlen(tmp) + snprintf(NULL, 0, "%d", l->n) + - 3) * sizeof(char)); + 4) * sizeof(char)); sprintf(st.search_results, "%s\t\t%d\n", tmp, l->n); } } @@ -670,35 +676,35 @@ static void get_search_stats(slist * l) static void get_result_stats(result * r) { if (r->n > 0) { - char tmp[] = "Unique matches:"; - st.unique_results = - (char *) + char tmp[] = "Unique tokens:"; + st.unique_results = (char *) malloc((strlen(tmp) + snprintf(NULL, 0, "%d", r->n) + - 3) * sizeof(char)); + 4) * sizeof(char)); sprintf(st.unique_results, "%s\t\t%d\n", tmp, r->n); } } void clean_stdin(void) { - int c; - do { - c = getchar(); - } while (c != '\n' && c != EOF); + int c; + do { + c = getchar(); + } while (c != '\n' && c != EOF); } -static void prompt(slist *l) +static void prompt(slist * l) { int r = 0; char p[20] = ""; char *tmp = NULL; - if (cfg.prompt) sprintf(p, "[%d]> ", r); + if (cfg.prompt) + sprintf(p, "[%d]> ", r); fflush(NULL); - while((tmp = linenoise(p)) != NULL) { - slist search_res = { 0 }; - if (search(l, tmp, &search_res)) { - result count_res = { 0 }; - /* sort the results */ + while ((tmp = linenoise(p)) != NULL) { + slist search_res = { 0 }; + if (search(l, tmp, &search_res)) { + result count_res = { 0 }; + /* sort the results */ qsort(&search_res.s[0], search_res.n, sizeof(char *), cmpstringp); @@ -706,27 +712,65 @@ static void prompt(slist *l) count(&search_res, &count_res); sort_by_count(&count_res); - r = count_res.n; + r = count_res.n; /* print them */ pc(&count_res); cfree(&count_res); - } - if (cfg.prompt) sprintf(p, "[%d]> ", r); - r = 0; - free(search_res.s); - free(tmp); - tmp = NULL; + } + if (cfg.prompt) + sprintf(p, "[%d]> ", r); + r = 0; + free(search_res.s); + free(tmp); + tmp = NULL; + } + if (tmp) + free(tmp); +} + +static int set_input(FILE **f) +{ + if (cfg.stdin) { + if (cfg.interactive) { + fprintf(stderr, + "Can't read from stdin in interactive mode.\n"); + return -1; + } + *f = stdin; + } else if (cfg.filestream) { + *f = fmemopen(cfg.file, strlen(cfg.file), "r"); + } else { + *f = fopen(cfg.file, "r"); + if (!f) { + fprintf(stderr, "Couldn't open %s\n", cfg.file); + return -1; + } } - if (tmp) free(tmp); + return 0; +} + +static void get_uniq(slist * l, result *r) +{ + /* sort the results */ + qsort(&l->s[0], l->n, sizeof(char *), + cmpstringp); + + /* count the unique, put it in result */ + count(l, r); + sort_by_count(r); + + if (cfg.stats) + get_result_stats(r); } int main(int argc, char *argv[]) { int rc = 0; FILE *f; - slist list = { 0 }; - slist search_res = { 0 }; + slist token_list = { 0 }; + slist ref_list = { 0 }; + slist *listp = NULL; result count_res = { 0 }; if (parse_cli(argc, argv) @@ -742,71 +786,51 @@ int main(int argc, char *argv[]) return -1; } - /* set input */ - if (cfg.stdin) { - if (cfg.interactive) { - fprintf(stderr, "Can't read from stdin in interactive mode.\n"); - return -1; - } - f = stdin; - } else if (cfg.filestream) { - f = fmemopen(cfg.file, strlen(cfg.file), "r"); - } else { - f = fopen(cfg.file, "r"); - if (!f) { - fprintf(stderr, "Couldn't open %s\n", cfg.file); - return -1; - } - } - /* tokenize */ - if (tokenize(f, &list)) { - rc = -1; + if ((rc = set_input(&f))) + goto done; + + if ((rc = tokenize(f, &token_list))) goto done; - } if (cfg.stats) - get_slist_stats(&list); + get_slist_stats(&token_list); if (cfg.interactive) { - prompt(&list); - goto done; + prompt(&token_list); + goto done; } if (cfg.print_all) { - pp(&list); + if (cfg.all_uniq) + listp = &token_list; + else + pp(&token_list); } else { /* search for the query */ - if (search(&list, cfg.query, &search_res)) { - - if (cfg.stats) - get_search_stats(&search_res); - - /* sort the results */ - qsort(&search_res.s[0], search_res.n, sizeof(char *), - cmpstringp); - - /* count the unique */ - count(&search_res, &count_res); - sort_by_count(&count_res); + if (search(&token_list, cfg.query, &ref_list)) { + listp = &ref_list; + if (cfg.stats) + get_search_stats(&ref_list); + } + } - if (cfg.stats) - get_result_stats(&count_res); + if (listp) { + get_uniq(listp, &count_res); + pc(&count_res); + cfree(&count_res); - /* print them */ - pc(&count_res); - - cfree(&count_res); - free(search_res.s); - } + if (listp == &ref_list) + free(ref_list.s); } + if (cfg.debug) print_cfg(); if (cfg.stats) print_stats(); -done: - sfree(&list); + done: + sfree(&token_list); fclose(f); return rc; } |