X-Git-Url: http://www.privoxy.org/gitweb/%40user-manual%40%40actions-help-prefix%40HIDE_IF_MODIFIED_SINCE?a=blobdiff_plain;f=parsers.c;h=a0dfb06e58479c586413ada4969a8680bac49f4a;hb=355b51d46c75908f843ede4f4c9d629fdd2a1781;hp=2d6087d15ee7b2b5d47db2796e6784297d449f5f;hpb=8722b8fa959697b6d954719d5ce5313b6ae97426;p=privoxy.git diff --git a/parsers.c b/parsers.c index 2d6087d1..a0dfb06e 100644 --- a/parsers.c +++ b/parsers.c @@ -1,4 +1,4 @@ -const char parsers_rcs[] = "$Id: parsers.c,v 1.93 2007/03/20 15:21:44 fabiankeil Exp $"; +const char parsers_rcs[] = "$Id: parsers.c,v 1.97 2007/04/15 16:39:21 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/parsers.c,v $ @@ -44,6 +44,31 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.93 2007/03/20 15:21:44 fabiankeil * * Revisions : * $Log: parsers.c,v $ + * Revision 1.97 2007/04/15 16:39:21 fabiankeil + * Introduce tags as alternative way to specify which + * actions apply to a request. At the moment tags can be + * created based on client and server headers. + * + * Revision 1.96 2007/04/12 12:53:58 fabiankeil + * Log a warning if the content is compressed, filtering is + * enabled and Privoxy was compiled without zlib support. + * Closes FR#1673938. + * + * Revision 1.95 2007/03/25 14:26:40 fabiankeil + * - Fix warnings when compiled with glibc. + * - Don't use crumble() for cookie crunching. + * - Move cookie time parsing into parse_header_time(). + * - Let parse_header_time() return a jb_err code + * instead of a pointer that can only be used to + * check for NULL anyway. + * + * Revision 1.94 2007/03/21 12:23:53 fabiankeil + * - Add better protection against malicious gzip headers. + * - Stop logging the first hundred bytes of decompressed content. + * It looks like it's working and there is always debug 16. + * - Log the content size after decompression in decompress_iob() + * instead of pcrs_filter_response(). + * * Revision 1.93 2007/03/20 15:21:44 fabiankeil * - Use dedicated header filter actions instead of abusing "filter". * Replace "filter-client-headers" and "filter-client-headers" @@ -642,6 +667,13 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.93 2007/03/20 15:21:44 fabiankeil #include #include #include + +#ifdef __GLIBC__ +/* + * Convince GNU's libc to provide a strptime prototype. + */ +#define __USE_XOPEN +#endif /*__GLIBC__ */ #include #ifdef FEATURE_ZLIB @@ -666,6 +698,7 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.93 2007/03/20 15:21:44 fabiankeil #include "jbsockets.h" #include "miscutil.h" #include "list.h" +#include "actions.h" #ifndef HAVE_STRPTIME #include "strptime.h" @@ -675,7 +708,7 @@ const char parsers_h_rcs[] = PARSERS_H_VERSION; /* Fix a problem with Solaris. There should be no effect on other * platforms. - * Solaris's isspace() is a macro which uses it's argument directly + * Solaris's isspace() is a macro which uses its argument directly * as an array index. Therefore we need to make sure that high-bit * characters generate +ve values, and ideally we also want to make * the argument match the declared parameter type of "int". @@ -686,6 +719,8 @@ const char parsers_h_rcs[] = PARSERS_H_VERSION; #define ijb_isupper(__X) isupper((int)(unsigned char)(__X)) #define ijb_tolower(__X) tolower((int)(unsigned char)(__X)) +jb_err header_tagger(struct client_state *csp, char *header); +jb_err scan_headers(struct client_state *csp); const struct parsers client_patterns[] = { { "referer:", 8, client_referrer }, @@ -1343,6 +1378,49 @@ char *get_header_value(const struct list *header_list, const char *header_name) } + +/********************************************************************* + * + * Function : scan_headers + * + * Description : Scans headers, applies tags and updates action bits. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * + * Returns : JB_ERR_OK + * + *********************************************************************/ +jb_err scan_headers(struct client_state *csp) +{ + struct list_entry *h; /* Header */ + jb_err err = JB_ERR_OK; + + log_error(LOG_LEVEL_HEADER, "scanning headers for: %s", csp->http->url); + + for (h = csp->headers->first; (err == JB_ERR_OK) && (h != NULL) ; h = h->next) + { + /* Header crunch()ed in previous run? -> ignore */ + if (h->str == NULL) continue; + log_error(LOG_LEVEL_HEADER, "scan: %s", h->str); + err = header_tagger(csp, h->str); + } + + /* + * header_tagger already updated the action bits + * for every new tag, but unless I'm confused, + * updating them again after all tags are collected, + * should give us another level of indirection when + * it comes to tagging based on tags which were set + * by tag sections which were active because of other + * tag sections themselves (or something like this). + */ + update_action_bits_for_all_tags(csp); + + return err; +} + + /********************************************************************* * * Function : sed @@ -1383,7 +1461,8 @@ char *sed(const struct parsers pats[], if (first_run) /* Parse and print */ { - log_error(LOG_LEVEL_HEADER, "scanning headers for: %s", csp->http->url); + scan_headers(csp); + for (v = pats; (err == JB_ERR_OK) && (v->str != NULL) ; v++) { for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL) ; p = p->next) @@ -1391,8 +1470,6 @@ char *sed(const struct parsers pats[], /* Header crunch()ed in previous run? -> ignore */ if (p->str == NULL) continue; - if (v == pats) log_error(LOG_LEVEL_HEADER, "scan: %s", p->str); - /* Does the current parser handle this header? */ if ((strncmpic(p->str, v->str, v->len) == 0) || (v->len == CHECK_EVERY_HEADER_REMAINING)) { @@ -1441,6 +1518,205 @@ char *sed(const struct parsers pats[], } + +/********************************************************************* + * + * Function : header_tagger + * + * Description : Executes all text substitutions from applying + * tag actions and saves the result as tag. + * + * XXX: Shares enough code with filter_header() and + * pcrs_filter_response() to warrant some helper functions. + * + * Parameters : + * 1 : csp = Current client state (buffers, headers, etc...) + * 2 : header = Header that is used as tagger input + * + * Returns : JB_ERR_OK on success and always succeeds + * + *********************************************************************/ +jb_err header_tagger(struct client_state *csp, char *header) +{ + int wanted_filter_type; + int multi_action_index; + int i; + pcrs_job *job; + + struct file_list *fl; + struct re_filterfile_spec *b; + struct list_entry *tag_name; + + int found_filters = 0; + const size_t header_length = strlen(header); + + if (csp->flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE) + { + wanted_filter_type = FT_SERVER_HEADER_TAGGER; + multi_action_index = ACTION_MULTI_SERVER_HEADER_TAGGER; + } + else + { + wanted_filter_type = FT_CLIENT_HEADER_TAGGER; + multi_action_index = ACTION_MULTI_CLIENT_HEADER_TAGGER; + } + + /* Check if there are any filters */ + for (i = 0; i < MAX_AF_FILES; i++) + { + fl = csp->rlist[i]; + if (NULL != fl) + { + if (NULL != fl->f) + { + found_filters = 1; + break; + } + } + } + + if (0 == found_filters) + { + log_error(LOG_LEVEL_ERROR, "Unable to get current state of regex tagging."); + return(JB_ERR_OK); + } + + for (i = 0; i < MAX_AF_FILES; i++) + { + fl = csp->rlist[i]; + if ((NULL == fl) || (NULL == fl->f)) + { + /* + * Either there are no filter files + * left, or this filter file just + * contains no valid filters. + * + * Continue to be sure we don't miss + * valid filter files that are chained + * after empty or invalid ones. + */ + continue; + } + + /* For all filters, */ + for (b = fl->f; b; b = b->next) + { + if (b->type != wanted_filter_type) + { + /* skip the ones we don't care about, */ + continue; + } + /* leaving only taggers that could apply, of which we use the ones, */ + for (tag_name = csp->action->multi[multi_action_index]->first; + NULL != tag_name; tag_name = tag_name->next) + { + /* that do apply, and */ + if (strcmp(b->name, tag_name->str) == 0) + { + char *modified_tag = NULL; + char *tag = header; + size_t size = header_length; + + if (NULL == b->joblist) + { + log_error(LOG_LEVEL_RE_FILTER, + "Tagger %s has empty joblist. Nothing to do.", b->name); + continue; + } + + /* execute their pcrs_joblist on the header. */ + for (job = b->joblist; NULL != job; job = job->next) + { + const int hits = pcrs_execute(job, tag, size, &modified_tag, &size); + + if (0 < hits) + { + /* Success, continue with the modified version. */ + if (tag != header) + { + freez(tag); + } + tag = modified_tag; + } + else + { + /* Tagger doesn't match */ + if (0 > hits) + { + /* Regex failure, log it but continue anyway. */ + log_error(LOG_LEVEL_ERROR, + "Problems with tagger \'%s\' and header \'%s\': %s", + b->name, *header, pcrs_strerror(hits)); + } + freez(modified_tag); + } + } + + /* If this tagger matched */ + if (tag != header) + { + if (0 == size) + { + /* + * There is to technical limitation which makes + * it impossible to use empty tags, but I assume + * no one would do it intentionally. + */ + freez(tag); + log_error(LOG_LEVEL_INFO, + "Tagger \'%s\' created an empty tag. Ignored.", + b->name); + continue; + } + + if (!list_contains_item(csp->tags, tag)) + { + if (JB_ERR_OK != enlist(csp->tags, tag)) + { + log_error(LOG_LEVEL_ERROR, + "Insufficient memory to add tag \'%s\', " + "based on tagger \'%s\' and header \'%s\'", + tag, b->name, *header); + } + else + { + char *action_message; + /* + * update the action bits right away, to make + * tagging based on tags set by earlier taggers + * of the same kind possible. + */ + if (update_action_bits_for_tag(csp, tag)) + { + action_message = "Action bits updated accordingly."; + } + else + { + action_message = "No action bits update necessary."; + } + + log_error(LOG_LEVEL_HEADER, + "Tagger \'%s\' added tag \'%s\'. %s", + b->name, tag, action_message); + } + } + else + { + /* XXX: Is this log-worthy? */ + log_error(LOG_LEVEL_HEADER, + "Tagger \'%s\' didn't add tag \'%s\'. " + "Tag already present", b->name, tag); + } + freez(tag); + } /* if the tagger matched */ + } /* if the tagger applies */ + } /* for every tagger that could apply */ + } /* for all filters */ + } /* for all filter files */ + + return JB_ERR_OK; +} + /* here begins the family of parser functions that reformat header lines */ /********************************************************************* @@ -1961,8 +2237,20 @@ jb_err server_content_encoding(struct client_state *csp, char **header) * Body is compressed, turn off pcrs and gif filtering. */ csp->content_type |= CT_TABOO; + + /* + * Log a warning if the user expects the content to be filtered. + */ + if ((csp->rlist != NULL) && + (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER]))) + { + log_error(LOG_LEVEL_INFO, + "Compressed content detected, content filtering disabled. " + "Consider recompiling Privoxy with zlib support or " + "enable the prevent-compression action."); + } } -#endif /* !defined(FEATURE_ZLIB) */ +#endif /* defined(FEATURE_ZLIB) */ return JB_ERR_OK; @@ -2177,6 +2465,8 @@ jb_err server_last_modified(struct client_state *csp, char **header) } else if (0 == strcmpic(newval, "randomize")) { + const char *header_time = *header + sizeof("Last-Modified:"); + log_error(LOG_LEVEL_HEADER, "Randomizing: %s", *header); now = time(NULL); #ifdef HAVE_GMTIME_R @@ -2188,9 +2478,9 @@ jb_err server_last_modified(struct client_state *csp, char **header) #else timeptr = gmtime(&now); #endif - if ((timeptr = parse_header_time(*header, &last_modified)) == NULL) + if (JB_ERR_OK != parse_header_time(header_time, &last_modified)) { - log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s (crunching!)", *header); + log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s in %s (crunching!)", header_time, *header); freez(*header); } else @@ -2937,9 +3227,11 @@ jb_err client_if_modified_since(struct client_state *csp, char **header) } else /* add random value */ { - if ((timeptr = parse_header_time(*header, &tm)) == NULL) + const char *header_time = *header + sizeof("If-Modified-Since:"); + + if (JB_ERR_OK != parse_header_time(header_time, &tm)) { - log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s (crunching!)", *header); + log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s in %s (crunching!)", header_time, *header); freez(*header); } else @@ -3377,9 +3669,6 @@ jb_err connection_close_adder(struct client_state *csp) *********************************************************************/ jb_err server_http(struct client_state *csp, char **header) { - /* Signal that were now parsing server headers. */ - csp->flags |= CSP_FLAG_CLIENT_HEADER_PARSING_DONE; - sscanf(*header, "HTTP/%*d.%*d %d", &(csp->http->status)); if (csp->http->status == 206) { @@ -3425,7 +3714,6 @@ jb_err server_set_cookie(struct client_state *csp, char **header) time_t now; time_t cookie_time; struct tm tm_now; - struct tm tm_cookie; time(&now); #ifdef FEATURE_COOKIE_JAR @@ -3457,8 +3745,8 @@ jb_err server_set_cookie(struct client_state *csp, char **header) if ((csp->action->flags & ACTION_NO_COOKIE_SET) != 0) { - log_error(LOG_LEVEL_HEADER, "Crunched incoming cookie -- yum!"); - return crumble(csp, header); + log_error(LOG_LEVEL_HEADER, "Crunching incoming cookie: %s", *header); + freez(*header); } else if ((csp->action->flags & ACTION_NO_COOKIE_KEEP) != 0) { @@ -3506,62 +3794,10 @@ jb_err server_set_cookie(struct client_state *csp, char **header) */ if ((strncmpic(cur_tag, "expires=", 8) == 0) && *(cur_tag + 8)) { - char *match; - const char *expiration_date = cur_tag + 8; /* Skip "[Ee]xpires=" */ - memset(&tm_cookie, 0, sizeof(tm_cookie)); - /* - * Try the valid time formats we know about. - * - * XXX: This should be moved to parse_header_time(). - * - * XXX: Maybe the log messages should be removed - * for the next stable release. They just exist to - * see which time format gets the most hits and - * should be checked for first. - */ - if (NULL != (match = strptime(expiration_date, "%a, %e-%b-%y %H:%M:%S ", &tm_cookie))) - { - /* 22-Feb-2008 12:01:18 GMT */ - log_error(LOG_LEVEL_HEADER, - "cookie \'%s\' send by %s appears to be using time format 1.", - *header, csp->http->url); - } - else if (NULL != (match = strptime(expiration_date, "%A, %e-%b-%Y %H:%M:%S ", &tm_cookie))) - { - /* Tue, 02-Jun-2037 20:00:00 GMT */ - log_error(LOG_LEVEL_HEADER, - "cookie \'%s\' send by %s appears to be using time format 2.", - *header, csp->http->url); - } - else if (NULL != (match = strptime(expiration_date, "%a, %e-%b-%Y %H:%M:%S ", &tm_cookie))) - { - /* Tuesday, 02-Jun-2037 20:00:00 GMT */ - /* - * On FreeBSD this is never reached because it's handled - * by "format 2" as well. I am, however, not sure if all - * strptime() implementations behave that way. - */ - log_error(LOG_LEVEL_HEADER, - "cookie \'%s\' send by %s appears to be using time format 3.", - *header, csp->http->url); - } - else if (NULL != (match = strptime(expiration_date, "%a, %e %b %Y %H:%M:%S ", &tm_cookie))) - { - /* Fri, 22 Feb 2008 19:20:05 GMT */ - log_error(LOG_LEVEL_HEADER, - "cookie \'%s\' send by %s appears to be using time format 4.", - *header, csp->http->url); - } - else if (NULL != (match = strptime(expiration_date, "%A %b %e %H:%M:%S %Y", &tm_cookie))) - { - /* Thu Mar 08 23:00:00 2007 GMT */ - log_error(LOG_LEVEL_HEADER, - "cookie \'%s\' send by %s appears to be using time format 5.", - *header, csp->http->url); - } + char *expiration_date = cur_tag + 8; /* Skip "[Ee]xpires=" */ - /* Did any of them match? */ - if (NULL == match) + /* Did we detect the date properly? */ + if (JB_ERR_OK != parse_header_time(expiration_date, &cookie_time)) { /* * Nope, treat it as if it was still valid. @@ -3609,7 +3845,6 @@ jb_err server_set_cookie(struct client_state *csp, char **header) * anyway, which in many cases will be shorter * than a browser session. */ - cookie_time = timegm(&tm_cookie); if (cookie_time - now < 0) { log_error(LOG_LEVEL_HEADER, @@ -3702,54 +3937,48 @@ int strclean(const char *string, const char *substring) * * Function : parse_header_time * - * Description : Transforms time inside a HTTP header into - * the usual time format. + * Description : Parses time formats used in HTTP header strings + * to get the numerical respresentation. * * Parameters : - * 1 : header = header to parse - * 2 : tm = storage for the resulting time in seconds + * 1 : header_time = HTTP header time as string. + * 2 : result = storage for header_time in seconds * - * Returns : Time struct containing the header time, or - * NULL in case of a parsing problems. + * Returns : JB_ERR_OK if the time format was recognized, or + * JB_ERR_PARSE otherwise. * *********************************************************************/ -struct tm *parse_header_time(char *header, time_t *tm) { - - char * timestring; +jb_err parse_header_time(const char *header_time, time_t *result) +{ struct tm gmt; - struct tm * timeptr; /* - * Initializing gmt to prevent time zone offsets. + * Zero out gmt to prevent time zone offsets. * * While this is only necessary on some platforms * (mingw32 for example), I don't know how to * detect these automatically and doing it everywhere * shouldn't hurt. */ - time(tm); -#ifdef HAVE_LOCALTIME_R - gmt = *localtime_r(tm, &gmt); -#elif FEATURE_PTHREAD - pthread_mutex_lock(&localtime_mutex); - gmt = *localtime(tm); - pthread_mutex_unlock(&localtime_mutex); -#else - gmt = *localtime(tm); -#endif - - /* Skipping header name */ - timestring = strstr(header, ": "); - if (strptime(timestring, ": %a, %d %b %Y %H:%M:%S", &gmt) == NULL) + memset(&gmt, 0, sizeof(gmt)); + + /* Tue, 02 Jun 2037 20:00:00 */ + if ((NULL == strptime(header_time, "%a, %d %b %Y %H:%M:%S", &gmt)) + /* Tue, 02-Jun-2037 20:00:00 */ + && (NULL == strptime(header_time, "%a, %d-%b-%Y %H:%M:%S", &gmt)) + /* Tue, 02-Jun-37 20:00:00 */ + && (NULL == strptime(header_time, "%a, %d-%b-%y %H:%M:%S", &gmt)) + /* Tuesday, 02-Jun-2037 20:00:00 */ + && (NULL == strptime(header_time, "%A, %d-%b-%Y %H:%M:%S", &gmt)) + /* Tuesday Jun 02 20:00:00 2037 */ + && (NULL == strptime(header_time, "%A %b %d %H:%M:%S %Y", &gmt))) { - timeptr = NULL; - } - else - { - *tm = timegm(&gmt); - timeptr = &gmt; + return JB_ERR_PARSE; } - return(timeptr); + + *result = timegm(&gmt); + + return JB_ERR_OK; }