summaryrefslogtreecommitdiffstats
path: root/fcomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'fcomp.c')
-rw-r--r--fcomp.c234
1 files changed, 129 insertions, 105 deletions
diff --git a/fcomp.c b/fcomp.c
index 33944eb..a466283 100644
--- a/fcomp.c
+++ b/fcomp.c
@@ -75,8 +75,9 @@ typedef struct config {
int lisp_print; /** 0: off 1: on */
int print_count; /** 0: off 1: on */
int print_all; /** 0: off 1: on */
+ int all_uniq; /** 0: off 1: on */
int interactive; /** 0: off 1: on */
- int prompt; /** 0: off 1: on */
+ int prompt; /** 0: off 1: on */
int filestream; /** 0: off 1: on */
char *file;
char *query;
@@ -92,26 +93,27 @@ typedef struct config {
/**
* Initial config vector */
static config cfg = {
- 0,
- 0,
- 0,
- 1,
- 0,
- 0,
- 0,
- 0,
- 0,
- 1,
- 0,
- NULL,
- NULL,
- 3,
- 0,
- NULL,
- 0,
- NULL,
- starts,
- 1000
+ .help = 0,
+ .debug = 0,
+ .stats = 0,
+ .stdin = 1,
+ .reverse_sort = 0,
+ .lisp_print = 0,
+ .print_count = 0,
+ .print_all = 0,
+ .all_uniq = 0,
+ .interactive = 0,
+ .prompt = 1,
+ .filestream = 0,
+ .file = NULL,
+ .query = NULL,
+ .min_word_size = 3,
+ .evc = 0,
+ .extra_valid_chars = NULL,
+ .eic = 0,
+ .extra_invalid_chars = NULL,
+ .search_method = starts,
+ .max_token_size = 1000
};
static void print_help(char *argv0)
@@ -147,6 +149,8 @@ static void print_help(char *argv0)
"Print the count of each matched token");
fprintf(stderr, "%8s %4s %15s %50s\n", "-l", "", "lisp",
"Print matched tokens in a lisp list");
+ fprintf(stderr, "%8s %4s %15s %50s\n\n", "-u", "", "unique",
+ "When -a is passed, don't print duplicates");
fprintf(stderr, "%8s %4s %15s %50s\n", "-a", "", "all",
"Print all tokens of input");
fprintf(stderr, "%8s %4s %15s %50s\n\n", "", "", "",
@@ -191,6 +195,7 @@ static void print_cfg()
fprintf(stderr, "[-l:%4s]\n", cfg.lisp_print ? "on" : "off");
fprintf(stderr, " [-c:%4s] ", cfg.print_count ? "on" : "off");
fprintf(stderr, "[-a:%4s] ", cfg.print_all ? "on" : "off");
+ fprintf(stderr, "[-u:%4s] ", cfg.all_uniq ? "on" : "off");
fprintf(stderr, "[-v:%4s] ", cfg.evc ? "on" : "off");
fprintf(stderr, "[-i:%4s]\n", cfg.eic ? "on" : "off");
}
@@ -218,18 +223,18 @@ static int parse_cli(int argc, char *argv[])
if (argc < 2)
return -1;
char c;
- while ((c = getopt(argc, argv, "-hIF:xzsadclrf:i:v:w:t:")) != -1) {
+ while ((c = getopt(argc, argv, "-hIF:xzsaudclrf:i:v:w:t:")) != -1) {
switch (c) {
case 'I':
- if (cfg.interactive) {
- cfg.prompt = 0;
- }
+ if (cfg.interactive) {
+ cfg.prompt = 0;
+ }
cfg.interactive = 1;
break;
case 'F':
cfg.stdin = 0;
- cfg.filestream = 1;
- cfg.file = optarg;
+ cfg.filestream = 1;
+ cfg.file = optarg;
break;
case 'h':
cfg.help = 1;
@@ -237,6 +242,9 @@ static int parse_cli(int argc, char *argv[])
case 'd':
cfg.debug = 1;
break;
+ case 'u':
+ cfg.all_uniq = 1;
+ break;
case 's':
cfg.stats = 1;
break;
@@ -608,7 +616,7 @@ static int tokenize(FILE * f, slist * l)
while ((c = fgetc(f)) != EOF) {
if (!is_valid(c)) {
- st.discarded++;
+ st.discarded++;
if (tmp) {
finalize_str(tmp, n, l);
free(tmp);
@@ -647,10 +655,9 @@ static void get_slist_stats(slist * l)
{
if (l->n > 0) {
char tmp[] = "Total tokens:";
- st.tok_results =
- (char *)
+ st.tok_results = (char *)
malloc((strlen(tmp) + snprintf(NULL, 0, "%d", l->n) +
- 3) * sizeof(char));
+ 4) * sizeof(char));
sprintf(st.tok_results, "%s\t\t%d\n", tmp, l->n);
}
}
@@ -659,10 +666,9 @@ static void get_search_stats(slist * l)
{
if (l->n > 0) {
char tmp[] = "Total matches:";
- st.search_results =
- (char *)
+ st.search_results = (char *)
malloc((strlen(tmp) + snprintf(NULL, 0, "%d", l->n) +
- 3) * sizeof(char));
+ 4) * sizeof(char));
sprintf(st.search_results, "%s\t\t%d\n", tmp, l->n);
}
}
@@ -670,35 +676,35 @@ static void get_search_stats(slist * l)
static void get_result_stats(result * r)
{
if (r->n > 0) {
- char tmp[] = "Unique matches:";
- st.unique_results =
- (char *)
+ char tmp[] = "Unique tokens:";
+ st.unique_results = (char *)
malloc((strlen(tmp) + snprintf(NULL, 0, "%d", r->n) +
- 3) * sizeof(char));
+ 4) * sizeof(char));
sprintf(st.unique_results, "%s\t\t%d\n", tmp, r->n);
}
}
void clean_stdin(void)
{
- int c;
- do {
- c = getchar();
- } while (c != '\n' && c != EOF);
+ int c;
+ do {
+ c = getchar();
+ } while (c != '\n' && c != EOF);
}
-static void prompt(slist *l)
+static void prompt(slist * l)
{
int r = 0;
char p[20] = "";
char *tmp = NULL;
- if (cfg.prompt) sprintf(p, "[%d]> ", r);
+ if (cfg.prompt)
+ sprintf(p, "[%d]> ", r);
fflush(NULL);
- while((tmp = linenoise(p)) != NULL) {
- slist search_res = { 0 };
- if (search(l, tmp, &search_res)) {
- result count_res = { 0 };
- /* sort the results */
+ while ((tmp = linenoise(p)) != NULL) {
+ slist search_res = { 0 };
+ if (search(l, tmp, &search_res)) {
+ result count_res = { 0 };
+ /* sort the results */
qsort(&search_res.s[0], search_res.n, sizeof(char *),
cmpstringp);
@@ -706,27 +712,65 @@ static void prompt(slist *l)
count(&search_res, &count_res);
sort_by_count(&count_res);
- r = count_res.n;
+ r = count_res.n;
/* print them */
pc(&count_res);
cfree(&count_res);
- }
- if (cfg.prompt) sprintf(p, "[%d]> ", r);
- r = 0;
- free(search_res.s);
- free(tmp);
- tmp = NULL;
+ }
+ if (cfg.prompt)
+ sprintf(p, "[%d]> ", r);
+ r = 0;
+ free(search_res.s);
+ free(tmp);
+ tmp = NULL;
+ }
+ if (tmp)
+ free(tmp);
+}
+
+static int set_input(FILE **f)
+{
+ if (cfg.stdin) {
+ if (cfg.interactive) {
+ fprintf(stderr,
+ "Can't read from stdin in interactive mode.\n");
+ return -1;
+ }
+ *f = stdin;
+ } else if (cfg.filestream) {
+ *f = fmemopen(cfg.file, strlen(cfg.file), "r");
+ } else {
+ *f = fopen(cfg.file, "r");
+ if (!f) {
+ fprintf(stderr, "Couldn't open %s\n", cfg.file);
+ return -1;
+ }
}
- if (tmp) free(tmp);
+ return 0;
+}
+
+static void get_uniq(slist * l, result *r)
+{
+ /* sort the results */
+ qsort(&l->s[0], l->n, sizeof(char *),
+ cmpstringp);
+
+ /* count the unique, put it in result */
+ count(l, r);
+ sort_by_count(r);
+
+ if (cfg.stats)
+ get_result_stats(r);
}
int main(int argc, char *argv[])
{
int rc = 0;
FILE *f;
- slist list = { 0 };
- slist search_res = { 0 };
+ slist token_list = { 0 };
+ slist ref_list = { 0 };
+ slist *listp = NULL;
result count_res = { 0 };
if (parse_cli(argc, argv)
@@ -742,71 +786,51 @@ int main(int argc, char *argv[])
return -1;
}
- /* set input */
- if (cfg.stdin) {
- if (cfg.interactive) {
- fprintf(stderr, "Can't read from stdin in interactive mode.\n");
- return -1;
- }
- f = stdin;
- } else if (cfg.filestream) {
- f = fmemopen(cfg.file, strlen(cfg.file), "r");
- } else {
- f = fopen(cfg.file, "r");
- if (!f) {
- fprintf(stderr, "Couldn't open %s\n", cfg.file);
- return -1;
- }
- }
- /* tokenize */
- if (tokenize(f, &list)) {
- rc = -1;
+ if ((rc = set_input(&f)))
+ goto done;
+
+ if ((rc = tokenize(f, &token_list)))
goto done;
- }
if (cfg.stats)
- get_slist_stats(&list);
+ get_slist_stats(&token_list);
if (cfg.interactive) {
- prompt(&list);
- goto done;
+ prompt(&token_list);
+ goto done;
}
if (cfg.print_all) {
- pp(&list);
+ if (cfg.all_uniq)
+ listp = &token_list;
+ else
+ pp(&token_list);
} else {
/* search for the query */
- if (search(&list, cfg.query, &search_res)) {
-
- if (cfg.stats)
- get_search_stats(&search_res);
-
- /* sort the results */
- qsort(&search_res.s[0], search_res.n, sizeof(char *),
- cmpstringp);
-
- /* count the unique */
- count(&search_res, &count_res);
- sort_by_count(&count_res);
+ if (search(&token_list, cfg.query, &ref_list)) {
+ listp = &ref_list;
+ if (cfg.stats)
+ get_search_stats(&ref_list);
+ }
+ }
- if (cfg.stats)
- get_result_stats(&count_res);
+ if (listp) {
+ get_uniq(listp, &count_res);
+ pc(&count_res);
+ cfree(&count_res);
- /* print them */
- pc(&count_res);
-
- cfree(&count_res);
- free(search_res.s);
- }
+ if (listp == &ref_list)
+ free(ref_list.s);
}
+
if (cfg.debug)
print_cfg();
if (cfg.stats)
print_stats();
-done:
- sfree(&list);
+ done:
+ sfree(&token_list);
fclose(f);
return rc;
}