-const char parsers_rcs[] = "$Id: parsers.c,v 1.217 2011/01/22 12:30:22 fabiankeil Exp $";
+const char parsers_rcs[] = "$Id: parsers.c,v 1.234 2011/10/08 17:31:51 fabiankeil Exp $";
/*********************************************************************
*
* File : $Source: /cvsroot/ijbswa/current/parsers.c,v $
static jb_err crunch_server_header (struct client_state *csp, char **header);
static jb_err server_last_modified (struct client_state *csp, char **header);
static jb_err server_content_disposition(struct client_state *csp, char **header);
+#ifdef FEATURE_ZLIB
+static jb_err server_adjust_content_encoding(struct client_state *csp, char **header);
+#endif
#ifdef FEATURE_CONNECTION_KEEP_ALIVE
static jb_err server_save_content_length(struct client_state *csp, char **header);
{
/** The header prefix to match */
const char *str;
-
+
/** The length of the prefix to match */
const size_t len;
-
+
/** The function to apply to this line */
const parser_func_ptr parser;
};
{
want *= 2;
}
-
+
if (want <= csp->config->buffer_limit && NULL != (p = (char *)realloc(iob->buf, want)))
{
iob->size = want;
jb_err decompress_iob(struct client_state *csp)
{
char *buf; /* new, uncompressed buffer */
- char *cur; /* Current iob position (to keep the original
+ char *cur; /* Current iob position (to keep the original
* iob->cur unmodified if we return early) */
size_t bufsize; /* allocated size of the new buffer */
size_t old_size; /* Content size before decompression */
*
* Fortunately, add_to_iob() has thoughtfully null-terminated
* the buffer; we can just increment the end pointer to include
- * the dummy byte.
+ * the dummy byte.
*/
csp->iob->eod++;
}
}
/*
- * If we tried the limit and still didn't have enough
- * memory, just give up.
+ * If we reached the buffer limit and still didn't have enough
+ * memory, just give up. Due to the ceiling enforced by the next
+ * if block we could actually check for equality here, but as it
+ * can be easily mistaken for a bug we don't.
*/
- if (bufsize == csp->config->buffer_limit)
+ if (bufsize >= csp->config->buffer_limit)
{
log_error(LOG_LEVEL_ERROR, "Buffer limit reached while decompressing iob");
return JB_ERR_MEMORY;
{
bufsize = csp->config->buffer_limit;
}
-
+
/* Try to allocate the new buffer. */
tmpbuf = realloc(buf, bufsize);
if (NULL == tmpbuf)
csp->iob->cur = csp->iob->buf + skip_size;
csp->iob->eod = (char *)zstr.next_out;
csp->iob->size = bufsize;
-
+
/*
* Make sure the new uncompressed iob obeys some minimal
* consistency conditions.
* Header spans multiple lines, append the next one.
*/
char *continued_header;
-
+
continued_header = get_header_line(iob);
if ((continued_header == NULL) || (*continued_header == '\0'))
{
}
}
- /*
+ /*
* Not found
*/
return NULL;
*
* Function : scan_headers
*
- * Description : Scans headers, applies tags and updates action bits.
+ * Description : Scans headers, applies tags and updates action bits.
*
* Parameters :
* 1 : csp = Current client state (buffers, headers, etc...)
{ "Content-Length:", 15, server_adjust_content_length },
{ "Transfer-Encoding:", 18, server_transfer_coding },
#ifdef FEATURE_ZLIB
- { "Content-Encoding:", 17, server_content_encoding },
+ { "Content-Encoding:", 17, server_adjust_content_encoding },
#endif /* def FEATURE_ZLIB */
{ NULL, 0, NULL }
};
}
#endif /* def FEATURE_CONNECTION_KEEP_ALIVE */
+#ifdef FEATURE_COMPRESSION
+ if ((JB_ERR_OK == err)
+ && (csp->flags & CSP_FLAG_BUFFERED_CONTENT_DEFLATED))
+ {
+ err = enlist_unique_header(csp->headers, "Content-Encoding", "deflate");
+ if (JB_ERR_OK == err)
+ {
+ log_error(LOG_LEVEL_HEADER, "Added header: Content-Encoding: deflate");
+ }
+ }
+#endif
+
return err;
}
b->name);
continue;
}
-
+
if (!list_contains_item(csp->tags, tag))
{
if (JB_ERR_OK != enlist(csp->tags, tag))
matches = pcrs_execute(job, *header, size, &newheader, &size);
if ( 0 < matches )
{
- current_hits += matches;
+ current_hits += matches;
log_error(LOG_LEVEL_HEADER, "Transforming \"%s\" to \"%s\"", *header, newheader);
freez(*header);
*header = newheader;
* Function : client_connection
*
* Description : Makes sure a proper "Connection:" header is
- * set and signals connection_header_adder
+ * set and signals connection_header_adder
* to do nothing.
*
* Parameters :
log_error(LOG_LEVEL_HEADER,
"Removing \'%s\' to imply keep-alive.", *header);
freez(*header);
+ /*
+ * While we imply keep-alive to the server,
+ * we have to remember that the client didn't.
+ *
+ * XXX: The implied keep-alive currently doesn't
+ * actually work due to a not yet properly
+ * analyzed regression in chat()
+ */
+ csp->flags &= ~CSP_FLAG_CLIENT_CONNECTION_KEEP_ALIVE;
}
else
{
/* Is the current header the lucky one? */
if (strstr(*header, crunch_pattern))
{
- log_error(LOG_LEVEL_HEADER, "Crunching server header: %s (contains: %s)", *header, crunch_pattern);
+ log_error(LOG_LEVEL_HEADER, "Crunching server header: %s (contains: %s)", *header, crunch_pattern);
freez(*header);
}
}
if (csp->action->flags & ACTION_CONTENT_TYPE_OVERWRITE)
{
/*
- * Make sure the user doesn't accidently
- * change the content type of binary documents.
+ * Make sure the user doesn't accidentally
+ * change the content type of binary documents.
*/
if ((csp->content_type & CT_TEXT) || (csp->action->flags & ACTION_FORCE_TEXT_MODE))
{
*
* Function : server_content_encoding
*
- * Description : This function is run twice for each request,
- * unless FEATURE_ZLIB and filtering are disabled.
+ * Description : Used to check if the content is compressed, and if
+ * FEATURE_ZLIB is disabled, filtering is disabled as
+ * well.
*
- * The first run is used to check if the content
- * is compressed, if FEATURE_ZLIB is disabled
- * filtering is then disabled as well, if FEATURE_ZLIB
- * is enabled the content is marked for decompression.
- *
- * The second run is used to remove the Content-Encoding
- * header if the decompression was successful.
+ * If FEATURE_ZLIB is enabled and the compression type
+ * supported, the content is marked for decompression.
+ *
+ * XXX: Doesn't properly deal with multiple or with
+ * unsupported but unknown encodings.
+ * Is case-sensitive but shouldn't be.
*
* Parameters :
* 1 : csp = Current client state (buffers, headers, etc...)
static jb_err server_content_encoding(struct client_state *csp, char **header)
{
#ifdef FEATURE_ZLIB
- if ((csp->flags & CSP_FLAG_MODIFIED)
- && (csp->content_type & (CT_GZIP | CT_DEFLATE)))
+ if (strstr(*header, "sdch"))
{
/*
- * We successfully decompressed the content,
- * and have to clean the header now, so the
- * client no longer expects compressed data..
- *
- * XXX: There is a difference between cleaning
- * and removing it completely.
+ * Shared Dictionary Compression over HTTP isn't supported,
+ * filtering it anyway is pretty much guaranteed to mess up
+ * the encoding.
*/
- log_error(LOG_LEVEL_HEADER, "Crunching: %s", *header);
- freez(*header);
+ csp->content_type |= CT_TABOO;
+
+ /*
+ * Log a warning if the user expects the content to be filtered.
+ */
+ if ((csp->rlist != NULL) &&
+ (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER])))
+ {
+ log_error(LOG_LEVEL_INFO,
+ "SDCH-compressed content detected, content filtering disabled. "
+ "Consider suppressing SDCH offers made by the client.");
+ }
}
else if (strstr(*header, "gzip"))
{
csp->content_type |= CT_TABOO;
}
#else /* !defined(FEATURE_ZLIB) */
- if (strstr(*header, "gzip") || strstr(*header, "compress") || strstr(*header, "deflate"))
+ /*
+ * XXX: Using a black list here isn't the right approach.
+ *
+ * In case of SDCH, building with zlib support isn't
+ * going to help.
+ */
+ if (strstr(*header, "gzip") ||
+ strstr(*header, "compress") ||
+ strstr(*header, "deflate") ||
+ strstr(*header, "sdch"))
{
/*
* Body is compressed, turn off pcrs and gif filtering.
}
+#ifdef FEATURE_ZLIB
+/*********************************************************************
+ *
+ * Function : server_adjust_content_encoding
+ *
+ * Description : Remove the Content-Encoding header if the
+ * decompression was successful and the content
+ * has been modifed.
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ * 2 : header = On input, pointer to header to modify.
+ * On output, pointer to the modified header, or NULL
+ * to remove the header. This function frees the
+ * original string if necessary.
+ *
+ * Returns : JB_ERR_OK on success, or
+ * JB_ERR_MEMORY on out-of-memory error.
+ *
+ *********************************************************************/
+static jb_err server_adjust_content_encoding(struct client_state *csp, char **header)
+{
+ if ((csp->flags & CSP_FLAG_MODIFIED)
+ && (csp->content_type & (CT_GZIP | CT_DEFLATE)))
+ {
+ /*
+ * We successfully decompressed the content,
+ * and have to clean the header now, so the
+ * client no longer expects compressed data.
+ *
+ * XXX: There is a difference between cleaning
+ * and removing it completely.
+ */
+ log_error(LOG_LEVEL_HEADER, "Crunching: %s", *header);
+ freez(*header);
+ }
+
+ return JB_ERR_OK;
+
+}
+#endif /* defined(FEATURE_ZLIB) */
+
+
/*********************************************************************
*
* Function : server_adjust_content_length
return JB_ERR_OK;
}
else
- {
+ {
/*
* Replacing Content-Disposition header
*/
static jb_err server_last_modified(struct client_state *csp, char **header)
{
const char *newval;
- char buf[BUFFER_SIZE];
time_t last_modified;
char newheader[50];
return JB_ERR_OK;
}
else if (0 == strcmpic(newval, "reset-to-request-time"))
- {
+ {
/*
* Setting Last-Modified Header to now.
*/
+ char buf[30];
get_http_time(0, buf, sizeof(buf));
freez(*header);
*header = strdup("Last-Modified: ");
- string_append(header, buf);
+ string_append(header, buf);
if (*header == NULL)
{
- log_error(LOG_LEVEL_HEADER, "Insufficient memory. Last-Modified header got lost, boohoo.");
+ log_error(LOG_LEVEL_HEADER, "Insufficient memory. Last-Modified header got lost, boohoo.");
}
else
{
if (negative_delta)
{
- rtime *= -1;
+ rtime *= -1;
log_error(LOG_LEVEL_HEADER, "Server time in the future.");
}
rtime = pick_from_range(rtime);
if (*header == NULL)
{
log_error(LOG_LEVEL_ERROR, "Insufficient memory, header crunched without replacement.");
- return JB_ERR_MEMORY;
+ return JB_ERR_MEMORY;
}
days = rtime / (3600 * 24);
*********************************************************************/
static jb_err client_accept_encoding(struct client_state *csp, char **header)
{
+#ifdef FEATURE_COMPRESSION
+ if ((csp->config->feature_flags & RUNTIME_FEATURE_COMPRESSION)
+ && strstr(*header, "deflate"))
+ {
+ csp->flags |= CSP_FLAG_CLIENT_SUPPORTS_DEFLATE;
+ }
+#endif
if ((csp->action->flags & ACTION_NO_COMPRESSION) != 0)
{
log_error(LOG_LEVEL_HEADER, "Suppressed offer to compress content");
/* booleans for parameters we have to check multiple times */
int parameter_conditional_block;
int parameter_conditional_forge;
-
+
#ifdef FEATURE_FORCE_LOAD
/*
* Since the referrer can include the prefix even
return JB_ERR_OK;
}
else
- {
+ {
/*
* Replacing Accept-Language header
*/
freez(*header);
*header = strdup("Accept-Language: ");
- string_append(header, newval);
+ string_append(header, newval);
if (*header == NULL)
{
log_error(LOG_LEVEL_ERROR,
- "Insufficient memory. Accept-Language header crunched without replacement.");
+ "Insufficient memory. Accept-Language header crunched without replacement.");
}
else
{
/* Is the current header the lucky one? */
if (strstr(*header, crunch_pattern))
{
- log_error(LOG_LEVEL_HEADER, "Crunching client header: %s (contains: %s)", *header, crunch_pattern);
+ log_error(LOG_LEVEL_HEADER, "Crunching client header: %s (contains: %s)", *header, crunch_pattern);
freez(*header);
}
}
return JB_ERR_OK;
}
- if (!csp->http->hostport || (*csp->http->hostport == '*') ||
+ if (!csp->http->hostport || (*csp->http->hostport == '*') ||
*csp->http->hostport == ' ' || *csp->http->hostport == '\0')
{
-
+
if (NULL == (p = strdup((*header)+6)))
{
return JB_ERR_MEMORY;
struct tm gmt;
#endif
struct tm *timeptr = NULL;
- time_t tm = 0;
+ time_t tm = 0;
const char *newval;
char * endptr;
-
+
if ( 0 == strcmpic(*header, "If-Modified-Since: Wed, 08 Jun 1955 12:00:00 GMT"))
{
- /*
+ /*
* The client got an error message because of a temporary problem,
* the problem is gone and the client now tries to revalidate our
* error message on the real server. The revalidation would always
*header, rtime, (rtime == 1 || rtime == -1) ? "e": "es");
if (negative_range)
{
- rtime *= -1;
+ rtime *= -1;
}
rtime *= 60;
rtime = pick_from_range(rtime);
if (*header == NULL)
{
log_error(LOG_LEVEL_HEADER, "Insufficient memory, header crunched without replacement.");
- return JB_ERR_MEMORY;
+ return JB_ERR_MEMORY;
}
hours = rtime / 3600;
static jb_err client_if_none_match(struct client_state *csp, char **header)
{
if (csp->action->flags & ACTION_CRUNCH_IF_NONE_MATCH)
- {
+ {
log_error(LOG_LEVEL_HEADER, "Crunching %s", *header);
freez(*header);
}
"force-text-mode overruled the client's request to fetch without filtering!");
}
else
- {
+ {
csp->content_type = CT_TABOO; /* XXX: This hack shouldn't be necessary */
csp->flags |= CSP_FLAG_NO_FILTERING;
log_error(LOG_LEVEL_HEADER, "Accepted the client's request to fetch without filtering.");
freez(*header);
}
}
- return JB_ERR_OK;
+ return JB_ERR_OK;
}
freez(*header);
}
- return JB_ERR_OK;
+ return JB_ERR_OK;
}
/* the following functions add headers directly to the header list */
*
* Function : server_connection_adder
*
- * Description : Adds an appropiate "Connection:" header to csp->headers
+ * Description : Adds an appropriate "Connection:" header to csp->headers
* unless the header was already present. Called from `sed'.
*
* Parameters :
*
* Description : Adds a "Proxy-Connection: keep-alive" header to
* csp->headers if the client asked for keep-alive.
- * XXX: We should reuse existant ones.
+ * XXX: We should reuse existent ones.
*
* Parameters :
* 1 : csp = Current client state (buffers, headers, etc...)
static jb_err server_set_cookie(struct client_state *csp, char **header)
{
time_t now;
- time_t cookie_time;
+ time_t cookie_time;
time(&now);
*
* Function : strclean
*
- * Description : In-Situ-Eliminate all occurances of substring in
+ * Description : In-Situ-Eliminate all occurrences of substring in
* string
*
* Parameters :
* to get the numerical respresentation.
*
* Parameters :
- * 1 : header_time = HTTP header time as string.
+ * 1 : header_time = HTTP header time as string.
* 2 : result = storage for header_time in seconds
*
* Returns : JB_ERR_OK if the time format was recognized, or
static jb_err parse_header_time(const char *header_time, time_t *result)
{
struct tm gmt;
-
/*
- * Zero out gmt to prevent time zone offsets.
- *
- * While this is only necessary on some platforms
- * (mingw32 for example), I don't know how to
- * detect these automatically and doing it everywhere
- * shouldn't hurt.
+ * Checking for two-digit years first in an
+ * attempt to work around GNU libc's strptime()
+ * reporting negative year values when using %Y.
*/
- memset(&gmt, 0, sizeof(gmt));
-
- /* Tue, 02 Jun 2037 20:00:00 */
- if ((NULL == strptime(header_time, "%a, %d %b %Y %H:%M:%S", &gmt))
- /* Tue, 02-Jun-2037 20:00:00 */
- && (NULL == strptime(header_time, "%a, %d-%b-%Y %H:%M:%S", &gmt))
- /* Tue, 02-Jun-37 20:00:00 */
- && (NULL == strptime(header_time, "%a, %d-%b-%y %H:%M:%S", &gmt))
- /* Tuesday, 02-Jun-2037 20:00:00 */
- && (NULL == strptime(header_time, "%A, %d-%b-%Y %H:%M:%S", &gmt))
- /* Tuesday Jun 02 20:00:00 2037 */
- && (NULL == strptime(header_time, "%A %b %d %H:%M:%S %Y", &gmt)))
+ static const char * const time_formats[] = {
+ /* Tue, 02-Jun-37 20:00:00 */
+ "%a, %d-%b-%y %H:%M:%S",
+ /* Tue, 02 Jun 2037 20:00:00 */
+ "%a, %d %b %Y %H:%M:%S",
+ /* Tue, 02-Jun-2037 20:00:00 */
+ "%a, %d-%b-%Y %H:%M:%S",
+ /* Tuesday, 02-Jun-2037 20:00:00 */
+ "%A, %d-%b-%Y %H:%M:%S",
+ /* Tuesday Jun 02 20:00:00 2037 */
+ "%A %b %d %H:%M:%S %Y"
+ };
+ unsigned int i;
+
+ for (i = 0; i < SZ(time_formats); i++)
{
- return JB_ERR_PARSE;
- }
+ /*
+ * Zero out gmt to prevent time zone offsets.
+ * Documented to be required for GNU libc.
+ */
+ memset(&gmt, 0, sizeof(gmt));
- *result = timegm(&gmt);
+ if (NULL != strptime(header_time, time_formats[i], &gmt))
+ {
+ /* Sanity check for GNU libc. */
+ if (gmt.tm_year < 0)
+ {
+ log_error(LOG_LEVEL_HEADER,
+ "Failed to parse '%s' using '%s'. Moving on.",
+ header_time, time_formats[i]);
+ continue;
+ }
+ *result = timegm(&gmt);
+ return JB_ERR_OK;
+ }
+ }
- return JB_ERR_OK;
+ return JB_ERR_PARSE;
}
* Parameters :
* 1 : headers = List of headers (one of them hopefully being
* the "Host:" header)
- * 2 : http = storage for the result (host, port and hostport).
+ * 2 : http = storage for the result (host, port and hostport).
*
* Returns : JB_ERR_MEMORY in case of memory problems,
* JB_ERR_PARSE if the host header couldn't be found,
return JB_ERR_PARSE;
}
- if (NULL == (p = strdup((host))))
+ p = strdup(host);
+ if (NULL == p)
{
log_error(LOG_LEVEL_ERROR, "Out of memory while parsing \"Host:\" header");
return JB_ERR_MEMORY;
*
* Description : Helper for client_referrer to forge a referer as
* 'http://[hostname:port/' to fool stupid
- * checks for in-site links
+ * checks for in-site links
*
* Parameters :
* 1 : header = Pointer to header pointer