From 8ccf05d0f151f127b09c2fdcad202bd901e12300 Mon Sep 17 00:00:00 2001 From: Richard Palmer Date: Sat, 7 Aug 2021 18:34:36 +0100 Subject: [PATCH 1/3] Enable local site-search and support for lowercasing searches --- src/options.c | 12 ++++++++++++ src/parser.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ src/settings.h | 2 ++ src/util.c | 18 ++++++++++++++++++ src/util.h | 1 + 5 files changed, 79 insertions(+) diff --git a/src/options.c b/src/options.c index 3cfc6dccf3..4654146c52 100644 --- a/src/options.c +++ b/src/options.c @@ -137,6 +137,8 @@ struct option long_opts[] = { {"real-time-html" , no_argument , 0 , 0 } , {"restore" , no_argument , 0 , 0 } , {"sort-panel" , required_argument , 0 , 0 } , + {"site-search" , no_argument , 0 , 0 } , + {"site-search-lower" , no_argument , 0 , 0 } , {"static-file" , required_argument , 0 , 0 } , {"user-name" , required_argument , 0 , 0 } , #ifdef HAVE_LIBSSL @@ -260,6 +262,8 @@ cmd_help (void) " --process-and-exit - Parse log and exit without outputting data.\n" " --real-os - Display real OS names. e.g, Windows XP, Snow Leopard.\n" " --restore - Restore data from disk from the given --db-path or from /tmp.\n" + " --site-search - Parse search keyphrases for a local site search using q URL param\n" + " --site-search-lower - Lower case search keyphrases from local site search\n" " --sort-panel=PANEL,METRIC,ORDER - Sort panel on initial load. e.g., --sort-panel=VISITORS,BY_HITS,ASC.\n" " See manpage for a list of panels/fields.\n" " --static-file= - Add static file extension. e.g.: .mp3. Extensions are case sensitive.\n" @@ -600,6 +604,14 @@ parse_long_opt (const char *name, const char *oarg) { set_array_opt (oarg, conf.static_files, &conf.static_file_idx, MAX_EXTENSIONS); } + /* local site search */ + if (!strcmp ("site-search", name)) + conf.site_search = 1; + + /* lowercase local site search */ + if (!strcmp ("site-search-lower", name)) + conf.site_search_lower = 1; + /* GEOIP OPTIONS * ========================= */ /* specifies the path of the GeoIP City database file */ diff --git a/src/parser.c b/src/parser.c index c5fc3feb23..2814d0a673 100644 --- a/src/parser.c +++ b/src/parser.c @@ -332,6 +332,49 @@ decode_url (char *url) { return trim_str (char_replace (out, '+', ' ')); } +/* Process keyphrases from local site search. + * Note that the referer hasn't been decoded at the entry point + * since there could be '&' within the search query. + * + * On error, 1 is returned. + * On success, the extracted keyphrase is assigned and 0 is returned. */ +static int +extract_sitesearch_keyphrase (char *ref, char **keyphrase) { + char *r, *ptr, *referer; + int encoded = 0; + + /* Find start of keyword */ + if ((r = strstr (ref, "&q=")) != NULL || (r = strstr (ref, "?q=")) != NULL) + r += 3; + else if ((r = strstr (ref, "%26q%3D")) != NULL || (r = strstr (ref, "%3Fq%3D")) != NULL) + encoded = 1, r += 7; + else + return 1; + + /* Find end of keyword and end string there*/ + if (!encoded && (ptr = strchr (r, '&')) != NULL) + *ptr = '\0'; + else if(!encoded && (ptr = strchr (r, ' ')) != NULL) + /* Handles case when there is nothing else after the q param */ + *ptr = '\0'; + else if (encoded && (ptr = strstr (r, "%26")) != NULL) + *ptr = '\0'; + + referer = decode_url (r); + if (referer == NULL || *referer == '\0') { + free (referer); + return 1; + } + + referer = char_replace (referer, '+', ' '); + *keyphrase = trim_str (referer); + + if(conf.site_search_lower) + *keyphrase = strtolower(*keyphrase); + + return 0; +} + /* Process keyphrases from Google search, cache, and translate. * Note that the referer hasn't been decoded at the entry point * since there could be '&' within the search query. @@ -1023,6 +1066,9 @@ parse_specifier (GLogItem * logitem, char **str, const char *p, const char *end) if (!(tkn = parse_string (&(*str), end, 1))) return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL); + if(conf.site_search) + extract_sitesearch_keyphrase (tkn, &logitem->keyphrase); + logitem->req = parse_req (tkn, &logitem->method, &logitem->protocol); free (tkn); break; diff --git a/src/settings.h b/src/settings.h index e5202bc8db..e79b458c3f 100644 --- a/src/settings.h +++ b/src/settings.h @@ -178,6 +178,8 @@ typedef struct GConf_ int real_time_html; /* enable real-time HTML output */ int restore; /* reload data from db-path */ int skip_term_resolver; /* no terminal resolver */ + int site_search; /* enable local site search keywords parsing */ + int site_search_lower; /* lower case local site search keywords */ int is_json_log_format; /* is a json log format */ uint32_t keep_last; /* number of days to keep in storage */ uint32_t num_tests; /* number of lines to test */ diff --git a/src/util.c b/src/util.c index 0da412bb59..d88b63fa16 100644 --- a/src/util.c +++ b/src/util.c @@ -925,6 +925,24 @@ strtoupper (char *str) { return str; } +/* Make a string lowercase. + * + * On error the original string is returned. + * On success, the lowercased string is returned. */ +char * +strtolower (char *str) { + char *p = str; + if (str == NULL || *str == '\0') + return str; + + while (*p != '\0') { + *p = tolower (*p); + p++; + } + + return str; +} + /* Left-pad a string with n amount of spaces. * * On success, a left-padded string is returned. */ diff --git a/src/util.h b/src/util.h index 12ba21694f..eadbc062f1 100644 --- a/src/util.h +++ b/src/util.h @@ -76,6 +76,7 @@ char *replace_str (const char *str, const char *old, const char *new); char *rtrim (char *s); char *secs_to_str (int secs); char *strtoupper(char *str); +char *strtolower(char *str); char *substring (const char *str, int begin, int len); char *trim_str (char *str); char *u322str (uint32_t d, int width); From 140b448a000029a16c1279f17a77d5c37d26ecb9 Mon Sep 17 00:00:00 2001 From: Richard Palmer Date: Sun, 17 Oct 2021 11:10:13 +0100 Subject: [PATCH 2/3] Allow query param name to be set and save for testing --- src/options.c | 18 ++++++++++++++---- src/parser.c | 9 +++++---- src/settings.h | 3 ++- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/options.c b/src/options.c index 4654146c52..d340305fcc 100644 --- a/src/options.c +++ b/src/options.c @@ -137,7 +137,7 @@ struct option long_opts[] = { {"real-time-html" , no_argument , 0 , 0 } , {"restore" , no_argument , 0 , 0 } , {"sort-panel" , required_argument , 0 , 0 } , - {"site-search" , no_argument , 0 , 0 } , + {"site-search" , required_argument , 0 , 0 } , {"site-search-lower" , no_argument , 0 , 0 } , {"static-file" , required_argument , 0 , 0 } , {"user-name" , required_argument , 0 , 0 } , @@ -262,7 +262,7 @@ cmd_help (void) " --process-and-exit - Parse log and exit without outputting data.\n" " --real-os - Display real OS names. e.g, Windows XP, Snow Leopard.\n" " --restore - Restore data from disk from the given --db-path or from /tmp.\n" - " --site-search - Parse search keyphrases for a local site search using q URL param\n" + " --site-search= - Parse search terms for local site search with query param (usually q)\n" " --site-search-lower - Lower case search keyphrases from local site search\n" " --sort-panel=PANEL,METRIC,ORDER - Sort panel on initial load. e.g., --sort-panel=VISITORS,BY_HITS,ASC.\n" " See manpage for a list of panels/fields.\n" @@ -605,8 +605,18 @@ parse_long_opt (const char *name, const char *oarg) { } /* local site search */ - if (!strcmp ("site-search", name)) - conf.site_search = 1; + if (!strcmp ("site-search", name)) { + char *first_param = xstrdup ("?"); + char *other_param = xstrdup ("&"); + + append_str(&first_param, oarg); + append_str(&first_param, "="); + append_str(&other_param, oarg); + append_str(&other_param, "="); + + conf.site_search = first_param; + conf.site_search_other = other_param; + } /* lowercase local site search */ if (!strcmp ("site-search-lower", name)) diff --git a/src/parser.c b/src/parser.c index 2814d0a673..c20a3a93ff 100644 --- a/src/parser.c +++ b/src/parser.c @@ -344,10 +344,11 @@ extract_sitesearch_keyphrase (char *ref, char **keyphrase) { int encoded = 0; /* Find start of keyword */ - if ((r = strstr (ref, "&q=")) != NULL || (r = strstr (ref, "?q=")) != NULL) - r += 3; - else if ((r = strstr (ref, "%26q%3D")) != NULL || (r = strstr (ref, "%3Fq%3D")) != NULL) - encoded = 1, r += 7; + if ((r = strstr (ref, conf.site_search)) != NULL || (r = strstr (ref, + conf.site_search_other)) != NULL) + r += strlen(conf.site_search); + // else if ((r = strstr (ref, "%26q%3D")) != NULL || (r = strstr (ref, "%3Fq%3D")) != NULL) + // encoded = 1, r += 7; else return 1; diff --git a/src/settings.h b/src/settings.h index e79b458c3f..b12187ce16 100644 --- a/src/settings.h +++ b/src/settings.h @@ -178,7 +178,8 @@ typedef struct GConf_ int real_time_html; /* enable real-time HTML output */ int restore; /* reload data from db-path */ int skip_term_resolver; /* no terminal resolver */ - int site_search; /* enable local site search keywords parsing */ + const char *site_search; /* enable local site search keywords parsing */ + const char *site_search_other; /* alternate matching for site search param */ int site_search_lower; /* lower case local site search keywords */ int is_json_log_format; /* is a json log format */ uint32_t keep_last; /* number of days to keep in storage */ From 2d29419b3f1768bbc0fc4e107f2c5da9a2237d1f Mon Sep 17 00:00:00 2001 From: Richard Palmer Date: Sun, 17 Oct 2021 11:12:13 +0100 Subject: [PATCH 3/3] Spacing --- src/options.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/options.c b/src/options.c index d340305fcc..6c0c493ab2 100644 --- a/src/options.c +++ b/src/options.c @@ -606,13 +606,13 @@ parse_long_opt (const char *name, const char *oarg) { /* local site search */ if (!strcmp ("site-search", name)) { - char *first_param = xstrdup ("?"); - char *other_param = xstrdup ("&"); + char *first_param = xstrdup ("?"); + char *other_param = xstrdup ("&"); - append_str(&first_param, oarg); - append_str(&first_param, "="); - append_str(&other_param, oarg); - append_str(&other_param, "="); + append_str(&first_param, oarg); + append_str(&first_param, "="); + append_str(&other_param, oarg); + append_str(&other_param, "="); conf.site_search = first_param; conf.site_search_other = other_param;