Add track.webtrekk.de to the block-as-image section.

[privoxy.git] / parsers.c
diff --git a/parsers.c b/parsers.c

index 6c952c9..0bcdc42 100644 (file)
--- a/parsers.c
+++ b/parsers.c
@@ -1,4 +1,4 @@
-const char parsers_rcs[] = "$Id: parsers.c,v 1.125 2008/04/17 14:40:49 fabiankeil Exp $";
+const char parsers_rcs[] = "$Id: parsers.c,v 1.136 2008/05/26 16:02:24 fabiankeil Exp $";
  /*********************************************************************
   *
   * File        :  $Source: /cvsroot/ijbswa/current/parsers.c,v $
@@ -17,7 +17,7 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.125 2008/04/17 14:40:49 fabiankei
   *                   `client_if_none_match', `get_destination_from_headers',
   *                   `parse_header_time', `decompress_iob' and `server_set_cookie'.
   *
- * Copyright   :  Written by and Copyright (C) 2001-2007 the SourceForge
+ * Copyright   :  Written by and Copyright (C) 2001-2008 the SourceForge
   *                Privoxy team. http://www.privoxy.org/
   *
   *                Based on the Internet Junkbuster originally written
@@ -44,6 +44,49 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.125 2008/04/17 14:40:49 fabiankei
   *
   * Revisions   :
   *    $Log: parsers.c,v $
+ *    Revision 1.136  2008/05/26 16:02:24  fabiankeil
+ *    s@Insufficent@Insufficient@
+ *
+ *    Revision 1.135  2008/05/21 20:12:10  fabiankeil
+ *    The whole point of strclean() is to modify the
+ *    first parameter, so don't mark it immutable,
+ *    even though the compiler lets us get away with it.
+ *
+ *    Revision 1.134  2008/05/21 19:27:25  fabiankeil
+ *    As the wafer actions are gone, we can stop including encode.h.
+ *
+ *    Revision 1.133  2008/05/21 15:50:47  fabiankeil
+ *    Ditch cast from (char **) to (char **).
+ *
+ *    Revision 1.132  2008/05/21 15:47:14  fabiankeil
+ *    Streamline sed()'s prototype and declare
+ *    the header parse and add structures static.
+ *
+ *    Revision 1.131  2008/05/20 20:13:30  fabiankeil
+ *    Factor update_server_headers() out of sed(), ditch the
+ *    first_run hack and make server_patterns_light static.
+ *
+ *    Revision 1.130  2008/05/19 17:18:04  fabiankeil
+ *    Wrap memmove() calls in string_move()
+ *    to document the purpose in one place.
+ *
+ *    Revision 1.129  2008/05/17 14:02:07  fabiankeil
+ *    Normalize linear header white space.
+ *
+ *    Revision 1.128  2008/05/16 16:39:03  fabiankeil
+ *    If a header is split across multiple lines,
+ *    merge them to a single line before parsing them.
+ *
+ *    Revision 1.127  2008/05/10 13:23:38  fabiankeil
+ *    Don't provide get_header() with the whole client state
+ *    structure when it only needs access to csp->iob.
+ *
+ *    Revision 1.126  2008/05/03 16:40:45  fabiankeil
+ *    Change content_filters_enabled()'s parameter from
+ *    csp->action to action so it can be also used in the
+ *    CGI code. Don't bother checking if there are filters
+ *    loaded, as that's somewhat besides the point.
+ *
   *    Revision 1.125  2008/04/17 14:40:49  fabiankeil
   *    Provide get_http_time() with the buffer size so it doesn't
   *    have to blindly assume that the buffer is big enough.
@@ -804,7 +847,6 @@ const char parsers_rcs[] = "$Id: parsers.c,v 1.125 2008/04/17 14:40:49 fabiankei
  #endif /* def FEATURE_PTHREAD */
  #include "list.h"
  #include "parsers.h"
-#include "encode.h"
  #include "ssplit.h"
  #include "errlog.h"
  #include "jbsockets.h"
@@ -832,6 +874,7 @@ const char parsers_h_rcs[] = PARSERS_H_VERSION;
  #define ijb_isupper(__X) isupper((int)(unsigned char)(__X))
  #define ijb_tolower(__X) tolower((int)(unsigned char)(__X))
  
+static char *get_header_line(struct iob *iob);
  static jb_err scan_headers(struct client_state *csp);
  static jb_err header_tagger(struct client_state *csp, char *header);
  static jb_err parse_header_time(const char *header_time, time_t *result);
@@ -875,7 +918,22 @@ static jb_err create_fake_referrer(char **header, const char *fake_referrer);
  static jb_err handle_conditional_hide_referrer_parameter(char **header,
     const char *host, const int parameter_conditional_block);
  
-const struct parsers client_patterns[] = {
+/*
+ * List of functions to run on a list of headers.
+ */
+struct parsers
+{
+   /** The header prefix to match */
+   const char *str;
+   
+   /** The length of the prefix to match */
+   const size_t len;
+   
+   /** The function to apply to this line */
+   const parser_func_ptr parser;
+};
+
+static const struct parsers client_patterns[] = {
     { "referer:",                  8,   client_referrer },
     { "user-agent:",              11,   client_uagent },
     { "ua-",                       3,   client_ua },
@@ -901,7 +959,7 @@ const struct parsers client_patterns[] = {
     { NULL,                        0,   NULL }
  };
  
-const struct parsers server_patterns[] = {
+static const struct parsers server_patterns[] = {
     { "HTTP/",                     5, server_http },
     { "set-cookie:",              11, server_set_cookie },
     { "connection:",              11, connection },
@@ -917,16 +975,7 @@ const struct parsers server_patterns[] = {
     { NULL, 0, NULL }
  };
  
-const struct parsers server_patterns_light[] = {
-   { "Content-Length:",          15, server_content_length },
-   { "Transfer-Encoding:",       18, server_transfer_coding },
-#ifdef FEATURE_ZLIB
-   { "Content-Encoding:",        17, server_content_encoding },
-#endif /* def FEATURE_ZLIB */
-   { NULL, 0, NULL }
-};
-
-const add_header_func_ptr add_client_headers[] = {
+static const add_header_func_ptr add_client_headers[] = {
     client_host_adder,
     client_xtra_adder,
     /* Temporarily disabled:    client_accept_encoding_adder, */
@@ -934,7 +983,7 @@ const add_header_func_ptr add_client_headers[] = {
     NULL
  };
  
-const add_header_func_ptr add_server_headers[] = {
+static const add_header_func_ptr add_server_headers[] = {
     connection_close_adder,
     NULL
  };
@@ -1426,14 +1475,184 @@ jb_err decompress_iob(struct client_state *csp)
  #endif /* defined(FEATURE_ZLIB) */
  
  
+/*********************************************************************
+ *
+ * Function    :  string_move
+ *
+ * Description :  memmove wrapper to move the last part of a string
+ *                towards the beginning, overwriting the part in
+ *                the middle. strlcpy() can't be used here as the
+ *                strings overlap.
+ *
+ * Parameters  :
+ *          1  :  dst = Destination to overwrite
+ *          2  :  src = Source to move.
+ *
+ * Returns     :  N/A
+ *
+ *********************************************************************/
+static void string_move(char *dst, char *src)
+{
+   assert(dst < src);
+
+   /* +1 to copy the terminating nul as well. */
+   memmove(dst, src, strlen(src)+1);
+}
+
+
+/*********************************************************************
+ *
+ * Function    :  normalize_lws
+ *
+ * Description :  Reduces unquoted linear white space in headers
+ *                to a single space in accordance with RFC 2616 2.2.
+ *                This simplifies parsing and filtering later on.
+ *
+ *                XXX: Remove log messages before
+ *                     the next stable release?
+ *
+ * Parameters  :
+ *          1  :  header = A header with linear white space to reduce.
+ *
+ * Returns     :  N/A
+ *
+ *********************************************************************/
+static void normalize_lws(char *header)
+{
+   char *p = header;
+
+   while (*p != '\0')
+   {
+      if (ijb_isspace(*p) && ijb_isspace(*(p+1)))
+      {
+         char *q = p+1;
+
+         while (ijb_isspace(*q))
+         {
+            q++;
+         }
+         log_error(LOG_LEVEL_HEADER, "Reducing white space in '%s'", header);
+         string_move(p+1, q);
+      }
+
+      if (*p == '\t')
+      {
+         log_error(LOG_LEVEL_HEADER,
+            "Converting tab to space in '%s'", header);
+         *p = ' ';
+      }
+      else if (*p == '"')
+      {
+         char *end_of_token = strstr(p+1, "\"");
+
+         if (NULL != end_of_token)
+         {
+            /* Don't mess with quoted text. */
+            p = end_of_token;
+         }
+         else
+         {
+            log_error(LOG_LEVEL_HEADER,
+               "Ignoring single quote in '%s'", header);
+         }
+      }
+      p++;
+   }
+
+   p = strchr(header, ':');
+   if ((p != NULL) && (p != header) && ijb_isspace(*(p-1)))
+   {
+      /*
+       * There's still space before the colon.
+       * We don't want it.
+       */
+      string_move(p-1, p);
+   }
+}
+
+
  /*********************************************************************
   *
   * Function    :  get_header
   *
   * Description :  This (odd) routine will parse the csp->iob
+ *                to get the next complete header.
   *
   * Parameters  :
- *          1  :  csp = Current client state (buffers, headers, etc...)
+ *          1  :  iob = The I/O buffer to parse, usually csp->iob.
+ *
+ * Returns     :  Any one of the following:
+ *
+ * 1) a pointer to a dynamically allocated string that contains a header line
+ * 2) NULL  indicating that the end of the header was reached
+ * 3) ""    indicating that the end of the iob was reached before finding
+ *          a complete header line.
+ *
+ *********************************************************************/
+char *get_header(struct iob *iob)
+{
+   char *header;
+
+   header = get_header_line(iob);
+
+   if ((header == NULL) || (*header == '\0'))
+   {
+      /*
+       * No complete header read yet, tell the client.
+       */
+      return header;
+   }
+
+   while ((iob->cur[0] == ' ') || (iob->cur[0] == '\t'))
+   {
+      /*
+       * Header spans multiple lines, append the next one.
+       */
+      char *continued_header;
+      
+      continued_header = get_header_line(iob);
+      if ((continued_header == NULL) || (*continued_header == '\0'))
+      {
+         /*
+          * No complete header read yet, return what we got.
+          * XXX: Should "unread" header instead.
+          */
+         log_error(LOG_LEVEL_INFO,
+            "Failed to read a multi-line header properly: '%s'",
+            header);
+         break;
+      }
+
+      if (JB_ERR_OK != string_join(&header, continued_header))
+      {
+         log_error(LOG_LEVEL_FATAL,
+            "Out of memory while appending multiple headers.");
+      }
+      else
+      {
+         /* XXX: remove before next stable release. */
+         log_error(LOG_LEVEL_HEADER,
+            "Merged multiple header lines to: '%s'",
+            header);
+      }
+   }
+
+   normalize_lws(header);
+
+   return header;
+
+}
+
+
+/*********************************************************************
+ *
+ * Function    :  get_header_line
+ *
+ * Description :  This (odd) routine will parse the csp->iob
+ *                to get the next header line.
+ *
+ * Parameters  :
+ *          1  :  iob = The I/O buffer to parse, usually csp->iob.
   *
   * Returns     :  Any one of the following:
   *
@@ -1443,11 +1662,9 @@ jb_err decompress_iob(struct client_state *csp)
   *          a complete header line.
   *
   *********************************************************************/
-char *get_header(struct client_state *csp)
+static char *get_header_line(struct iob *iob)
  {
-   struct iob *iob;
     char *p, *q, *ret;
-   iob = csp->iob;
  
     if ((iob->cur == NULL)
        || ((p = strchr(iob->cur, '\n')) == NULL))
@@ -1461,7 +1678,7 @@ char *get_header(struct client_state *csp)
     if (ret == NULL)
     {
        /* FIXME No way to handle error properly */
-      log_error(LOG_LEVEL_FATAL, "Out of memory in get_header()");
+      log_error(LOG_LEVEL_FATAL, "Out of memory in get_header_line()");
     }
  
     iob->cur = p+1;
@@ -1572,79 +1789,106 @@ static jb_err scan_headers(struct client_state *csp)
   *                As a side effect it frees the space used by the original
   *                header lines.
   *
- *                XXX: should be split to remove the first_run hack.
- *
   * Parameters  :
- *          1  :  pats = list of patterns to match against headers
- *          2  :  more_headers = list of functions to add more
- *                headers (client or server)
- *          3  :  csp = Current client state (buffers, headers, etc...)
+ *          1  :  csp = Current client state (buffers, headers, etc...)
+ *          2  :  filter_server_headers = Boolean to switch between
+ *                                        server and header filtering.
   *
   * Returns     :  JB_ERR_OK in case off success, or
   *                JB_ERR_MEMORY on out-of-memory error.
   *
   *********************************************************************/
-jb_err sed(const struct parsers pats[],
-           const add_header_func_ptr more_headers[],
-           struct client_state *csp)
+jb_err sed(struct client_state *csp, int filter_server_headers)
  {
+   /* XXX: use more descriptive names. */
     struct list_entry *p;
     const struct parsers *v;
     const add_header_func_ptr *f;
     jb_err err = JB_ERR_OK;
-   int first_run;
-
-   /*
-    * If filtering is enabled, sed is run twice,
-    * but most of the work needs to be done only once.
-    */
-   first_run = (more_headers != NULL ) ? 1 : 0;
  
-   if (first_run) /* Parse and print */
+   if (filter_server_headers)
+   {
+      v = server_patterns;
+      f = add_server_headers;
+   }
+   else
     {
-      scan_headers(csp);
+      v = client_patterns;
+      f = add_client_headers;
+   }
  
-      for (v = pats; (err == JB_ERR_OK) && (v->str != NULL) ; v++)
+   scan_headers(csp);
+
+   while ((err == JB_ERR_OK) && (v->str != NULL))
+   {
+      for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL); p = p->next)
        {
-         for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL) ; p = p->next)
-         {
-            /* Header crunch()ed in previous run? -> ignore */
-            if (p->str == NULL) continue;
+         /* Header crunch()ed in previous run? -> ignore */
+         if (p->str == NULL) continue;
  
-            /* Does the current parser handle this header? */
-            if ((strncmpic(p->str, v->str, v->len) == 0) || (v->len == CHECK_EVERY_HEADER_REMAINING))
-            {
-               err = v->parser(csp, (char **)&(p->str));
-            }
+         /* Does the current parser handle this header? */
+         if ((strncmpic(p->str, v->str, v->len) == 0) ||
+             (v->len == CHECK_EVERY_HEADER_REMAINING))
+         {
+            err = v->parser(csp, &(p->str));
           }
        }
-      /* place any additional headers on the csp->headers list */
-      for (f = more_headers; (err == JB_ERR_OK) && (*f) ; f++)
-      {
-         err = (*f)(csp);
-      }
+      v++;
     }
-   else /* Parse only */
+
+   /* place additional headers on the csp->headers list */
+   while ((err == JB_ERR_OK) && (*f))
     {
-      /*
-       * The second run is only needed if the body was modified
-       * and the content-lenght has changed.
-       */
-      if (strncmpic(csp->http->cmd, "HEAD", 4))
+      err = (*f)(csp);
+      f++;
+   }
+
+   return err;
+}
+
+
+/*********************************************************************
+ *
+ * Function    :  update_server_headers
+ *
+ * Description :  Updates server headers after the body has been modified.
+ *
+ * Parameters  :
+ *          1  :  csp = Current client state (buffers, headers, etc...)
+ *
+ * Returns     :  JB_ERR_OK in case off success, or
+ *                JB_ERR_MEMORY on out-of-memory error.
+ *
+ *********************************************************************/
+jb_err update_server_headers(struct client_state *csp)
+{
+   jb_err err = JB_ERR_OK;
+
+   static const struct parsers server_patterns_light[] = {
+      { "Content-Length:",    15, server_content_length },
+      { "Transfer-Encoding:", 18, server_transfer_coding },
+#ifdef FEATURE_ZLIB
+      { "Content-Encoding:",  17, server_content_encoding },
+#endif /* def FEATURE_ZLIB */
+      { NULL, 0, NULL }
+   };
+
+   if (strncmpic(csp->http->cmd, "HEAD", 4))
+   {
+      const struct parsers *v;
+      struct list_entry *p;
+
+      for (v = server_patterns_light; (err == JB_ERR_OK) && (v->str != NULL); v++)
        {
-         /*XXX: Code duplication */
-         for (v = pats; (err == JB_ERR_OK) && (v->str != NULL) ; v++)
+         for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL); p = p->next)
           {
-            for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL) ; p = p->next)
-            {
-               /* Header crunch()ed in previous run? -> ignore */
-               if (p->str == NULL) continue;
+            /* Header crunch()ed in previous run? -> ignore */
+            if (p->str == NULL) continue;
  
-               /* Does the current parser handle this header? */
-               if (strncmpic(p->str, v->str, v->len) == 0)
-               {
-                  err = v->parser(csp, (char **)&(p->str));
-               }
+            /* Does the current parser handle this header? */
+            if (strncmpic(p->str, v->str, v->len) == 0)
+            {
+               err = v->parser(csp, (char **)&(p->str));
              }
           }
        }
@@ -1654,7 +1898,6 @@ jb_err sed(const struct parsers pats[],
  }
  
  
-
  /*********************************************************************
   *
   * Function    :  header_tagger
@@ -2597,7 +2840,7 @@ static jb_err server_last_modified(struct client_state *csp, char **header)
  
        if (*header == NULL)
        {
-         log_error(LOG_LEVEL_HEADER, "Insufficent memory. Last-Modified header got lost, boohoo.");  
+         log_error(LOG_LEVEL_HEADER, "Insufficient memory. Last-Modified header got lost, boohoo.");  
        }
        else
        {
@@ -2656,21 +2899,19 @@ static jb_err server_last_modified(struct client_state *csp, char **header)
  
              if (*header == NULL)
              {
-               log_error(LOG_LEVEL_ERROR, "Insufficent memory, header crunched without replacement.");
+               log_error(LOG_LEVEL_ERROR, "Insufficient memory, header crunched without replacement.");
                 return JB_ERR_MEMORY;  
              }
  
-            if (LOG_LEVEL_HEADER & debug) /* Save cycles if the user isn't interested. */
-            {
-               days    = rtime / (3600 * 24);
-               hours   = rtime / 3600 % 24;
-               minutes = rtime / 60 % 60;
-               seconds = rtime % 60;            
-
-               log_error(LOG_LEVEL_HEADER, "Randomized:  %s (added %d da%s %d hou%s %d minut%s %d second%s",
-                  *header, days, (days == 1) ? "y" : "ys", hours, (hours == 1) ? "r" : "rs",
-                  minutes, (minutes == 1) ? "e" : "es", seconds, (seconds == 1) ? ")" : "s)");
-            }
+            days    = rtime / (3600 * 24);
+            hours   = rtime / 3600 % 24;
+            minutes = rtime / 60 % 60;
+            seconds = rtime % 60;
+
+            log_error(LOG_LEVEL_HEADER,
+               "Randomized:  %s (added %d da%s %d hou%s %d minut%s %d second%s",
+               *header, days, (days == 1) ? "y" : "ys", hours, (hours == 1) ? "r" : "rs",
+               minutes, (minutes == 1) ? "e" : "es", seconds, (seconds == 1) ? ")" : "s)");
           }
           else
           {
@@ -2892,7 +3133,7 @@ static jb_err client_accept_language(struct client_state *csp, char **header)
        if (*header == NULL)
        {
           log_error(LOG_LEVEL_ERROR,
-            "Insufficent memory. Accept-Language header crunched without replacement.");  
+            "Insufficient memory. Accept-Language header crunched without replacement.");  
        }
        else
        {
@@ -3355,20 +3596,19 @@ static jb_err client_if_modified_since(struct client_state *csp, char **header)
  
              if (*header == NULL)
              {
-               log_error(LOG_LEVEL_HEADER, "Insufficent memory, header crunched without replacement.");
+               log_error(LOG_LEVEL_HEADER, "Insufficient memory, header crunched without replacement.");
                 return JB_ERR_MEMORY;  
              }
  
-            if (LOG_LEVEL_HEADER & debug) /* Save cycles if the user isn't interested. */
-            {
-               hours   = rtime / 3600;
-               minutes = rtime / 60 % 60;
-               seconds = rtime % 60;            
+            hours   = rtime / 3600;
+            minutes = rtime / 60 % 60;
+            seconds = rtime % 60;
  
-               log_error(LOG_LEVEL_HEADER, "Randomized:  %s (%s %d hou%s %d minut%s %d second%s",
-                  *header, (negative) ? "subtracted" : "added", hours, (hours == 1) ? "r" : "rs",
-                  minutes, (minutes == 1) ? "e" : "es", seconds, (seconds == 1) ? ")" : "s)");
-            }
+            log_error(LOG_LEVEL_HEADER,
+               "Randomized:  %s (%s %d hou%s %d minut%s %d second%s",
+               *header, (negative) ? "subtracted" : "added", hours,
+               (hours == 1) ? "r" : "rs", minutes, (minutes == 1) ? "e" : "es",
+               seconds, (seconds == 1) ? ")" : "s)");
           }
        }
     }
@@ -3820,7 +4060,7 @@ static jb_err server_set_cookie(struct client_state *csp, char **header)
                  */
                 log_error(LOG_LEVEL_ERROR,
                    "Can't parse \'%s\', send by %s. Unsupported time format?", cur_tag, csp->http->url);
-               memmove(cur_tag, next_tag, strlen(next_tag) + 1);
+               string_move(cur_tag, next_tag);
                 changed = 1;
              }
              else
@@ -3871,12 +4111,8 @@ static jb_err server_set_cookie(struct client_state *csp, char **header)
                    /*
                     * Still valid, delete expiration date by copying
                     * the rest of the string over it.
-                   *
-                   * (Note that we cannot just use "strcpy(cur_tag, next_tag)",
-                   * since the behaviour of strcpy is undefined for overlapping
-                   * strings.)
                     */
-                  memmove(cur_tag, next_tag, strlen(next_tag) + 1);
+                  string_move(cur_tag, next_tag);
  
                    /* That changed the header, need to issue a log message */
                    changed = 1;
@@ -3923,7 +4159,7 @@ static jb_err server_set_cookie(struct client_state *csp, char **header)
   * Returns     :  Number of eliminations
   *
   *********************************************************************/
-int strclean(const char *string, const char *substring)
+int strclean(char *string, const char *substring)
  {
     int hits = 0;
     size_t len;