1 const char parsers_rcs[] = "$Id: parsers.c,v 1.132 2008/05/21 15:47:14 fabiankeil Exp $";
2 /*********************************************************************
4 * File : $Source: /cvsroot/ijbswa/current/parsers.c,v $
6 * Purpose : Declares functions to parse/crunch headers and pages.
7 * Functions declared include:
8 * `add_to_iob', `client_cookie_adder', `client_from',
9 * `client_referrer', `client_send_cookie', `client_ua',
10 * `client_uagent', `client_x_forwarded',
11 * `client_x_forwarded_adder', `client_xtra_adder',
12 * `content_type', `crumble', `destroy_list', `enlist',
13 * `flush_socket', ``get_header', `sed', `filter_header'
14 * `server_content_encoding', `server_content_disposition',
15 * `server_last_modified', `client_accept_language',
16 * `crunch_client_header', `client_if_modified_since',
17 * `client_if_none_match', `get_destination_from_headers',
18 * `parse_header_time', `decompress_iob' and `server_set_cookie'.
20 * Copyright : Written by and Copyright (C) 2001-2008 the SourceForge
21 * Privoxy team. http://www.privoxy.org/
23 * Based on the Internet Junkbuster originally written
24 * by and Copyright (C) 1997 Anonymous Coders and
25 * Junkbusters Corporation. http://www.junkbusters.com
27 * This program is free software; you can redistribute it
28 * and/or modify it under the terms of the GNU General
29 * Public License as published by the Free Software
30 * Foundation; either version 2 of the License, or (at
31 * your option) any later version.
33 * This program is distributed in the hope that it will
34 * be useful, but WITHOUT ANY WARRANTY; without even the
35 * implied warranty of MERCHANTABILITY or FITNESS FOR A
36 * PARTICULAR PURPOSE. See the GNU General Public
37 * License for more details.
39 * The GNU General Public License should be included with
40 * this file. If not, you can view it at
41 * http://www.gnu.org/copyleft/gpl.html
42 * or write to the Free Software Foundation, Inc., 59
43 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
47 * Revision 1.132 2008/05/21 15:47:14 fabiankeil
48 * Streamline sed()'s prototype and declare
49 * the header parse and add structures static.
51 * Revision 1.131 2008/05/20 20:13:30 fabiankeil
52 * Factor update_server_headers() out of sed(), ditch the
53 * first_run hack and make server_patterns_light static.
55 * Revision 1.130 2008/05/19 17:18:04 fabiankeil
56 * Wrap memmove() calls in string_move()
57 * to document the purpose in one place.
59 * Revision 1.129 2008/05/17 14:02:07 fabiankeil
60 * Normalize linear header white space.
62 * Revision 1.128 2008/05/16 16:39:03 fabiankeil
63 * If a header is split across multiple lines,
64 * merge them to a single line before parsing them.
66 * Revision 1.127 2008/05/10 13:23:38 fabiankeil
67 * Don't provide get_header() with the whole client state
68 * structure when it only needs access to csp->iob.
70 * Revision 1.126 2008/05/03 16:40:45 fabiankeil
71 * Change content_filters_enabled()'s parameter from
72 * csp->action to action so it can be also used in the
73 * CGI code. Don't bother checking if there are filters
74 * loaded, as that's somewhat besides the point.
76 * Revision 1.125 2008/04/17 14:40:49 fabiankeil
77 * Provide get_http_time() with the buffer size so it doesn't
78 * have to blindly assume that the buffer is big enough.
80 * Revision 1.124 2008/04/16 16:38:21 fabiankeil
81 * Don't pass the whole csp structure to flush_socket()
82 * when it only needs a file descriptor and a buffer.
84 * Revision 1.123 2008/03/29 12:13:46 fabiankeil
85 * Remove send-wafer and send-vanilla-wafer actions.
87 * Revision 1.122 2008/03/28 15:13:39 fabiankeil
88 * Remove inspect-jpegs action.
90 * Revision 1.121 2008/01/05 21:37:03 fabiankeil
91 * Let client_range() also handle Request-Range headers
92 * which apparently are still supported by many servers.
94 * Revision 1.120 2008/01/04 17:43:45 fabiankeil
95 * Improve the warning messages that get logged if the action files
96 * "enable" filters but no filters of that type have been loaded.
98 * Revision 1.119 2007/12/28 18:32:51 fabiankeil
99 * In server_content_type():
100 * - Don't require leading white space when detecting image content types.
101 * - Change '... not replaced ...' message to sound less crazy if the text
102 * type actually is 'text/plain'.
103 * - Mark the 'text/plain == binary data' assumption for removal.
104 * - Remove a bunch of trailing white space.
106 * Revision 1.118 2007/12/28 16:56:35 fabiankeil
107 * Minor server_content_disposition() changes:
108 * - Don't regenerate the header name all lower-case.
109 * - Some white space fixes.
110 * - Remove useless log message in case of ENOMEM.
112 * Revision 1.117 2007/12/06 18:11:50 fabiankeil
113 * Garbage-collect the code to add a X-Forwarded-For
114 * header as it seems to be mostly used by accident.
116 * Revision 1.116 2007/12/01 13:04:22 fabiankeil
117 * Fix a crash on mingw32 with some Last Modified times in the future.
119 * Revision 1.115 2007/11/02 16:52:50 fabiankeil
120 * Remove a "can't happen" error block which, over
121 * time, mutated into a "guaranteed to happen" block.
123 * Revision 1.114 2007/10/19 16:56:26 fabiankeil
124 * - Downgrade "Buffer limit reached" message to LOG_LEVEL_INFO.
125 * - Use shiny new content_filters_enabled() in client_range().
127 * Revision 1.113 2007/10/10 17:29:57 fabiankeil
128 * I forgot about Poland.
130 * Revision 1.112 2007/10/09 16:38:40 fabiankeil
131 * Remove Range and If-Range headers if content filtering is enabled.
133 * Revision 1.111 2007/10/04 18:07:00 fabiankeil
134 * Move ACTION_VANILLA_WAFER handling from jcc's chat() into
135 * client_cookie_adder() to make sure send-vanilla-wafer can be
136 * controlled through tags (and thus regression-tested).
138 * Revision 1.110 2007/09/29 10:42:37 fabiankeil
139 * - Remove "scanning headers for" log message again.
140 * - Some more whitespace fixes.
142 * Revision 1.109 2007/09/08 14:25:48 fabiankeil
143 * Refactor client_referrer() and add conditional-forge parameter.
145 * Revision 1.108 2007/08/28 18:21:03 fabiankeil
146 * A bunch of whitespace fixes, pointy hat to me.
148 * Revision 1.107 2007/08/28 18:16:32 fabiankeil
149 * Fix possible memory corruption in server_http, make sure it's not
150 * executed for ordinary server headers and mark some problems for later.
152 * Revision 1.106 2007/08/18 14:30:32 fabiankeil
153 * Let content-type-overwrite{} honour force-text-mode again.
155 * Revision 1.105 2007/08/11 14:49:49 fabiankeil
156 * - Add prototpyes for the header parsers and make them static.
157 * - Comment out client_accept_encoding_adder() which isn't used right now.
159 * Revision 1.104 2007/07/14 07:38:19 fabiankeil
160 * Move the ACTION_FORCE_TEXT_MODE check out of
161 * server_content_type(). Signal other functions
162 * whether or not a content type has been declared.
163 * Part of the fix for BR#1750917.
165 * Revision 1.103 2007/06/01 16:31:54 fabiankeil
166 * Change sed() to return a jb_err in preparation for forward-override{}.
168 * Revision 1.102 2007/05/27 12:39:32 fabiankeil
169 * Adjust "X-Filter: No" to disable dedicated header filters.
171 * Revision 1.101 2007/05/14 10:16:41 fabiankeil
172 * Streamline client_cookie_adder().
174 * Revision 1.100 2007/04/30 15:53:11 fabiankeil
175 * Make sure filters with dynamic jobs actually use them.
177 * Revision 1.99 2007/04/30 15:06:26 fabiankeil
178 * - Introduce dynamic pcrs jobs that can resolve variables.
179 * - Remove unnecessary update_action_bits_for_all_tags() call.
181 * Revision 1.98 2007/04/17 18:32:10 fabiankeil
182 * - Make tagging based on tags set by earlier taggers
183 * of the same kind possible.
184 * - Log whether or not new tags cause action bits updates
185 * (in which case a matching tag-pattern section exists).
186 * - Log if the user tries to set a tag that is already set.
188 * Revision 1.97 2007/04/15 16:39:21 fabiankeil
189 * Introduce tags as alternative way to specify which
190 * actions apply to a request. At the moment tags can be
191 * created based on client and server headers.
193 * Revision 1.96 2007/04/12 12:53:58 fabiankeil
194 * Log a warning if the content is compressed, filtering is
195 * enabled and Privoxy was compiled without zlib support.
198 * Revision 1.95 2007/03/25 14:26:40 fabiankeil
199 * - Fix warnings when compiled with glibc.
200 * - Don't use crumble() for cookie crunching.
201 * - Move cookie time parsing into parse_header_time().
202 * - Let parse_header_time() return a jb_err code
203 * instead of a pointer that can only be used to
204 * check for NULL anyway.
206 * Revision 1.94 2007/03/21 12:23:53 fabiankeil
207 * - Add better protection against malicious gzip headers.
208 * - Stop logging the first hundred bytes of decompressed content.
209 * It looks like it's working and there is always debug 16.
210 * - Log the content size after decompression in decompress_iob()
211 * instead of pcrs_filter_response().
213 * Revision 1.93 2007/03/20 15:21:44 fabiankeil
214 * - Use dedicated header filter actions instead of abusing "filter".
215 * Replace "filter-client-headers" and "filter-client-headers"
216 * with "server-header-filter" and "client-header-filter".
217 * - Remove filter_client_header() and filter_client_header(),
218 * filter_header() now checks the shiny new
219 * CSP_FLAG_CLIENT_HEADER_PARSING_DONE flag instead.
221 * Revision 1.92 2007/03/05 13:25:32 fabiankeil
222 * - Cosmetical changes for LOG_LEVEL_RE_FILTER messages.
223 * - Handle "Cookie:" and "Connection:" headers a bit smarter
224 * (don't crunch them just to recreate them later on).
225 * - Add another non-standard time format for the cookie
226 * expiration date detection.
227 * - Fix a valgrind warning.
229 * Revision 1.91 2007/02/24 12:27:32 fabiankeil
230 * Improve cookie expiration date detection.
232 * Revision 1.90 2007/02/08 19:12:35 fabiankeil
233 * Don't run server_content_length() the first time
234 * sed() parses server headers; only adjust the
235 * Content-Length header if the page was modified.
237 * Revision 1.89 2007/02/07 16:52:11 fabiankeil
238 * Fix log messages regarding the cookie time format
239 * (cookie and request URL were mixed up).
241 * Revision 1.88 2007/02/07 11:27:12 fabiankeil
242 * - Let decompress_iob()
243 * - not corrupt the content if decompression fails
244 * early. (the first byte(s) were lost).
245 * - use pointer arithmetics with defined outcome for
247 * - Use a different kludge to remember a failed decompression.
249 * Revision 1.87 2007/01/31 16:21:38 fabiankeil
250 * Search for Max-Forwards headers case-insensitive,
251 * don't generate the "501 unsupported" message for invalid
252 * Max-Forwards values and don't increase negative ones.
254 * Revision 1.86 2007/01/30 13:05:26 fabiankeil
255 * - Let server_set_cookie() check the expiration date
256 * of cookies and don't touch the ones that are already
257 * expired. Fixes problems with low quality web applications
258 * as described in BR 932612.
260 * - Adjust comment in client_max_forwards to reality;
261 * remove invalid Max-Forwards headers.
263 * Revision 1.85 2007/01/26 15:33:46 fabiankeil
264 * Stop filter_header() from unintentionally removing
265 * empty header lines that were enlisted by the continue
268 * Revision 1.84 2007/01/24 12:56:52 fabiankeil
269 * - Repeat the request URL before logging any headers.
270 * Makes reading the log easier in case of simultaneous requests.
271 * - If there are more than one Content-Type headers in one request,
272 * use the first one and remove the others.
273 * - Remove "newval" variable in server_content_type().
274 * It's only used once.
276 * Revision 1.83 2007/01/12 15:03:02 fabiankeil
277 * Correct a cast, check inflateEnd() exit code
278 * to see if we have to, replace sprintf calls
281 * Revision 1.82 2007/01/01 19:36:37 fabiankeil
282 * Integrate a modified version of Wil Mahan's
283 * zlib patch (PR #895531).
285 * Revision 1.81 2006/12/31 22:21:33 fabiankeil
286 * Skip empty filter files in filter_header()
287 * but don't ignore the ones that come afterwards.
288 * Fixes BR 1619208, this time for real.
290 * Revision 1.80 2006/12/29 19:08:22 fabiankeil
291 * Reverted parts of my last commit
292 * to keep error handling working.
294 * Revision 1.79 2006/12/29 18:04:40 fabiankeil
295 * Fixed gcc43 conversion warnings.
297 * Revision 1.78 2006/12/26 17:19:20 fabiankeil
298 * Bringing back the "useless" localtime() call
299 * I removed in revision 1.67. On some platforms
300 * it's necessary to prevent time zone offsets.
302 * Revision 1.77 2006/12/07 18:44:26 fabiankeil
303 * Rebuild request URL in get_destination_from_headers()
304 * to make sure redirect{pcrs command} works as expected
305 * for intercepted requests.
307 * Revision 1.76 2006/12/06 19:52:25 fabiankeil
308 * Added get_destination_from_headers().
310 * Revision 1.75 2006/11/13 19:05:51 fabiankeil
311 * Make pthread mutex locking more generic. Instead of
312 * checking for OSX and OpenBSD, check for FEATURE_PTHREAD
313 * and use mutex locking unless there is an _r function
314 * available. Better safe than sorry.
316 * Fixes "./configure --disable-pthread" and should result
317 * in less threading-related problems on pthread-using platforms,
318 * but it still doesn't fix BR#1122404.
320 * Revision 1.74 2006/10/02 16:59:12 fabiankeil
321 * The special header "X-Filter: No" now disables
322 * header filtering as well.
324 * Revision 1.73 2006/09/23 13:26:38 roro
325 * Replace TABs by spaces in source code.
327 * Revision 1.72 2006/09/23 12:37:21 fabiankeil
328 * Don't print a log message every time filter_headers is
329 * entered or left. It only creates noise without any real
332 * Revision 1.71 2006/09/21 19:55:17 fabiankeil
333 * Fix +hide-if-modified-since{-n}.
335 * Revision 1.70 2006/09/08 12:06:34 fabiankeil
336 * Have hide-if-modified-since interpret the random
337 * range value as minutes instead of hours. Allows
338 * more fine-grained configuration.
340 * Revision 1.69 2006/09/06 16:25:51 fabiankeil
341 * Always have parse_header_time return a pointer
342 * that actual makes sense, even though we currently
343 * only need it to detect problems.
345 * Revision 1.68 2006/09/06 10:43:32 fabiankeil
346 * Added config option enable-remote-http-toggle
347 * to specify if Privoxy should recognize special
348 * headers (currently only X-Filter) to change its
349 * behaviour. Disabled by default.
351 * Revision 1.67 2006/09/04 11:01:26 fabiankeil
352 * After filtering de-chunked instances, remove
353 * "Transfer-Encoding" header entirely instead of changing
354 * it to "Transfer-Encoding: identity", which is invalid.
355 * Thanks Michael Shields <shields@msrl.com>. Fixes PR 1318658.
357 * Don't use localtime in parse_header_time. An empty time struct
358 * is good enough, it gets overwritten by strptime anyway.
360 * Revision 1.66 2006/09/03 19:38:28 fabiankeil
361 * Use gmtime_r if available, fallback to gmtime with mutex
362 * protection for MacOSX and use vanilla gmtime for the rest.
364 * Revision 1.65 2006/08/22 10:55:56 fabiankeil
365 * Changed client_referrer to use the right type (size_t) for
366 * hostlenght and to shorten the temporary referrer string with
367 * '\0' instead of adding a useless line break.
369 * Revision 1.64 2006/08/17 17:15:10 fabiankeil
370 * - Back to timegm() using GnuPG's replacement if necessary.
371 * Using mktime() and localtime() could add a on hour offset if
372 * the randomize factor was big enough to lead to a summer/wintertime
375 * - Removed now-useless Privoxy 3.0.3 compatibility glue.
377 * - Moved randomization code into pick_from_range().
379 * - Changed parse_header_time definition.
380 * time_t isn't guaranteed to be signed and
381 * if it isn't, -1 isn't available as error code.
382 * Changed some variable types in client_if_modified_since()
383 * because of the same reason.
385 * Revision 1.63 2006/08/14 13:18:08 david__schmidt
386 * OS/2 compilation compatibility fixups
388 * Revision 1.62 2006/08/14 08:58:42 fabiankeil
389 * Changed include from strptime.c to strptime.h
391 * Revision 1.61 2006/08/14 08:25:19 fabiankeil
392 * Split filter-headers{} into filter-client-headers{}
393 * and filter-server-headers{}.
394 * Added parse_header_time() to share some code.
395 * Replaced timegm() with mktime().
397 * Revision 1.60 2006/08/12 03:54:37 david__schmidt
398 * Windows service integration
400 * Revision 1.59 2006/08/03 02:46:41 david__schmidt
401 * Incorporate Fabian Keil's patch work:
\rhttp://www.fabiankeil.de/sourcecode/privoxy/
403 * Revision 1.58 2006/07/18 14:48:47 david__schmidt
404 * Reorganizing the repository: swapping out what was HEAD (the old 3.1 branch)
405 * with what was really the latest development (the v_3_0_branch branch)
407 * Revision 1.56.2.10 2006/01/21 16:16:08 david__schmidt
408 * Thanks to Edward Carrel for his patch to modernize OSX's
\rpthreads support. See bug #1409623.
410 * Revision 1.56.2.9 2004/10/03 12:53:45 david__schmidt
411 * Add the ability to check jpeg images for invalid
412 * lengths of comment blocks. Defensive strategy
413 * against the exploit:
414 * Microsoft Security Bulletin MS04-028
415 * Buffer Overrun in JPEG Processing (GDI+) Could
416 * Allow Code Execution (833987)
417 * Enabled with +inspect-jpegs in actions files.
419 * Revision 1.56.2.8 2003/07/11 13:21:25 oes
420 * Excluded text/plain objects from filtering. This fixes a
421 * couple of client-crashing, download corruption and
422 * Privoxy performance issues, whose root cause lies in
423 * web servers labelling content of unknown type as text/plain.
425 * Revision 1.56.2.7 2003/05/06 12:07:26 oes
426 * Fixed bug #729900: Suspicious HOST: headers are now killed and regenerated if necessary
428 * Revision 1.56.2.6 2003/04/14 21:28:30 oes
429 * Completing the previous change
431 * Revision 1.56.2.5 2003/04/14 12:08:16 oes
432 * Added temporary workaround for bug in PHP < 4.2.3
434 * Revision 1.56.2.4 2003/03/07 03:41:05 david__schmidt
435 * Wrapping all *_r functions (the non-_r versions of them) with mutex semaphores for OSX. Hopefully this will take care of all of those pesky crash reports.
437 * Revision 1.56.2.3 2002/11/10 04:20:02 hal9
438 * Fix typo: supressed -> suppressed
440 * Revision 1.56.2.2 2002/09/25 14:59:53 oes
441 * Improved cookie logging
443 * Revision 1.56.2.1 2002/09/25 14:52:45 oes
444 * Added basic support for OPTIONS and TRACE HTTP methods:
445 * - New parser function client_max_forwards which decrements
446 * the Max-Forwards HTTP header field of OPTIONS and TRACE
447 * requests by one before forwarding
448 * - New parser function client_host which extracts the host
449 * and port information from the HTTP header field if the
450 * request URI was not absolute
451 * - Don't crumble and re-add the Host: header, but only generate
452 * and append if missing
454 * Revision 1.56 2002/05/12 15:34:22 jongfoster
455 * Fixing typo in a comment
457 * Revision 1.55 2002/05/08 16:01:07 oes
458 * Optimized add_to_iob:
459 * - Use realloc instead of malloc(), memcpy(), free()
460 * - Expand to powers of two if possible, to get
461 * O(log n) reallocs instead of O(n).
462 * - Moved check for buffer limit here from chat
463 * - Report failure via returncode
465 * Revision 1.54 2002/04/02 15:03:16 oes
466 * Tiny code cosmetics
468 * Revision 1.53 2002/03/26 22:29:55 swa
469 * we have a new homepage!
471 * Revision 1.52 2002/03/24 13:25:43 swa
472 * name change related issues
474 * Revision 1.51 2002/03/13 00:27:05 jongfoster
477 * Revision 1.50 2002/03/12 01:45:35 oes
478 * More verbose logging
480 * Revision 1.49 2002/03/09 20:03:52 jongfoster
481 * - Making various functions return int rather than size_t.
482 * (Undoing a recent change). Since size_t is unsigned on
483 * Windows, functions like read_socket that return -1 on
484 * error cannot return a size_t.
486 * THIS WAS A MAJOR BUG - it caused frequent, unpredictable
487 * crashes, and also frequently caused JB to jump to 100%
488 * CPU and stay there. (Because it thought it had just
489 * read ((unsigned)-1) == 4Gb of data...)
491 * - The signature of write_socket has changed, it now simply
492 * returns success=0/failure=nonzero.
494 * - Trying to get rid of a few warnings --with-debug on
495 * Windows, I've introduced a new type "jb_socket". This is
496 * used for the socket file descriptors. On Windows, this
497 * is SOCKET (a typedef for unsigned). Everywhere else, it's
498 * an int. The error value can't be -1 any more, so it's
499 * now JB_INVALID_SOCKET (which is -1 on UNIX, and in
500 * Windows it maps to the #define INVALID_SOCKET.)
502 * - The signature of bind_port has changed.
504 * Revision 1.48 2002/03/07 03:46:53 oes
505 * Fixed compiler warnings etc
507 * Revision 1.47 2002/02/20 23:15:13 jongfoster
508 * Parsing functions now handle out-of-memory gracefully by returning
511 * Revision 1.46 2002/01/17 21:03:47 jongfoster
512 * Moving all our URL and URL pattern parsing code to urlmatch.c.
514 * Revision 1.45 2002/01/09 14:33:03 oes
515 * Added support for localtime_r.
517 * Revision 1.44 2001/12/14 01:22:54 steudten
518 * Remove 'user:pass@' from 'proto://user:pass@host' for the
519 * new added header 'Host: ..'. (See Req ID 491818)
521 * Revision 1.43 2001/11/23 00:26:38 jongfoster
522 * Fixing two really stupid errors in my previous commit
524 * Revision 1.42 2001/11/22 21:59:30 jongfoster
525 * Adding code to handle +no-cookies-keep
527 * Revision 1.41 2001/11/05 23:43:05 steudten
528 * Add time+date to log files.
530 * Revision 1.40 2001/10/26 20:13:09 jongfoster
531 * ctype.h is needed in Windows, too.
533 * Revision 1.39 2001/10/26 17:40:04 oes
534 * Introduced get_header_value()
535 * Removed http->user_agent, csp->referrer and csp->accept_types
536 * Removed client_accept()
538 * Revision 1.38 2001/10/25 03:40:48 david__schmidt
539 * Change in porting tactics: OS/2's EMX porting layer doesn't allow multiple
540 * threads to call select() simultaneously. So, it's time to do a real, live,
541 * native OS/2 port. See defines for __EMX__ (the porting layer) vs. __OS2__
542 * (native). Both versions will work, but using __OS2__ offers multi-threading.
544 * Revision 1.37 2001/10/23 21:36:02 jongfoster
545 * Documenting sed()'s error behaviou (doc change only)
547 * Revision 1.36 2001/10/13 12:51:51 joergs
548 * Removed client_host, (was only required for the old 2.0.2-11 http://noijb.
549 * force-load), instead crumble Host: and add it (again) in client_host_adder
550 * (in case we get a HTTP/1.0 request without Host: header and forward it to
551 * a HTTP/1.1 server/proxy).
553 * Revision 1.35 2001/10/09 22:39:21 jongfoster
554 * assert.h is also required under Win32, so moving out of #ifndef _WIN32
557 * Revision 1.34 2001/10/07 18:50:55 oes
558 * Added server_content_encoding, renamed server_transfer_encoding
560 * Revision 1.33 2001/10/07 18:04:49 oes
561 * Changed server_http11 to server_http and its pattern to "HTTP".
562 * Additional functionality: it now saves the HTTP status into
563 * csp->http->status and sets CT_TABOO for Status 206 (partial range)
565 * Revision 1.32 2001/10/07 15:43:28 oes
566 * Removed FEATURE_DENY_GZIP and replaced it with client_accept_encoding,
567 * client_te and client_accept_encoding_adder, triggered by the new
568 * +no-compression action. For HTTP/1.1 the Accept-Encoding header is
569 * changed to allow only identity and chunked, and the TE header is
570 * crunched. For HTTP/1.0, Accept-Encoding is crunched.
572 * parse_http_request no longer does anything than parsing. The rewriting
573 * of http->cmd and version mangling are gone. It now also recognizes
574 * the put and delete methods and saves the url in http->url. Removed
577 * renamed content_type and content_length to have the server_ prefix
579 * server_content_type now only works if csp->content_type != CT_TABOO
581 * added server_transfer_encoding, which
582 * - Sets CT_TABOO to prohibit filtering if encoding compresses
583 * - Raises the CSP_FLAG_CHUNKED flag if Encoding is "chunked"
584 * - Change from "chunked" to "identity" if body was chunked
585 * but has been de-chunked for filtering.
587 * added server_content_md5 which crunches any Content-MD5 headers
588 * if the body was modified.
590 * made server_http11 conditional on +downgrade action
592 * Replaced 6 boolean members of csp with one bitmap (csp->flags)
594 * Revision 1.31 2001/10/05 14:25:02 oes
595 * Crumble Keep-Alive from Server
597 * Revision 1.30 2001/09/29 12:56:03 joergs
598 * IJB now changes HTTP/1.1 to HTTP/1.0 in requests and answers.
600 * Revision 1.29 2001/09/24 21:09:24 jongfoster
601 * Fixing 2 memory leaks that Guy spotted, where the paramater to
602 * enlist() was not being free()d.
604 * Revision 1.28 2001/09/22 16:32:28 jongfoster
605 * Removing unused #includes.
607 * Revision 1.27 2001/09/20 15:45:25 steudten
609 * add casting from size_t to int for printf()
610 * remove local variable shadow s2
612 * Revision 1.26 2001/09/16 17:05:14 jongfoster
613 * Removing unused #include showarg.h
615 * Revision 1.25 2001/09/16 13:21:27 jongfoster
616 * Changes to use new list functions.
618 * Revision 1.24 2001/09/13 23:05:50 jongfoster
619 * Changing the string paramater to the header parsers a "const".
621 * Revision 1.23 2001/09/12 18:08:19 steudten
623 * In parse_http_request() header rewriting miss the host value, so
624 * from http://www.mydomain.com the result was just " / " not
625 * http://www.mydomain.com/ in case we forward.
627 * Revision 1.22 2001/09/10 10:58:53 oes
628 * Silenced compiler warnings
630 * Revision 1.21 2001/07/31 14:46:00 oes
631 * - Persistant connections now suppressed
632 * - sed() no longer appends empty header to csp->headers
634 * Revision 1.20 2001/07/30 22:08:36 jongfoster
635 * Tidying up #defines:
636 * - All feature #defines are now of the form FEATURE_xxx
637 * - Permanently turned off WIN_GUI_EDIT
638 * - Permanently turned on WEBDAV and SPLIT_PROXY_ARGS
640 * Revision 1.19 2001/07/25 17:21:54 oes
641 * client_uagent now saves copy of User-Agent: header value
643 * Revision 1.18 2001/07/13 14:02:46 oes
644 * - Included fix to repair broken HTTP requests that
645 * don't contain a path, not even '/'.
646 * - Removed all #ifdef PCRS
647 * - content_type now always inspected and classified as
648 * text, gif or other.
649 * - formatting / comments
651 * Revision 1.17 2001/06/29 21:45:41 oes
652 * Indentation, CRLF->LF, Tab-> Space
654 * Revision 1.16 2001/06/29 13:32:42 oes
656 * - Adapted free_http_request
657 * - Removed logentry from cancelled commit
659 * Revision 1.15 2001/06/03 19:12:38 oes
660 * deleted const struct interceptors
662 * Revision 1.14 2001/06/01 18:49:17 jongfoster
663 * Replaced "list_share" with "list" - the tiny memory gain was not
664 * worth the extra complexity.
666 * Revision 1.13 2001/05/31 21:30:33 jongfoster
667 * Removed list code - it's now in list.[ch]
668 * Renamed "permission" to "action", and changed many features
669 * to use the actions file rather than the global config.
671 * Revision 1.12 2001/05/31 17:33:13 oes
675 * Revision 1.11 2001/05/29 20:11:19 joergs
676 * '/ * inside comment' warning removed.
678 * Revision 1.10 2001/05/29 09:50:24 jongfoster
679 * Unified blocklist/imagelist/permissionslist.
680 * File format is still under discussion, but the internal changes
683 * Also modified interceptor behaviour:
684 * - We now intercept all URLs beginning with one of the following
685 * prefixes (and *only* these prefixes):
687 * * http://ijbswa.sf.net/config/
688 * * http://ijbswa.sourceforge.net/config/
689 * - New interceptors "home page" - go to http://i.j.b/ to see it.
690 * - Internal changes so that intercepted and fast redirect pages
691 * are not replaced with an image.
692 * - Interceptors now have the option to send a binary page direct
693 * to the client. (i.e. ijb-send-banner uses this)
694 * - Implemented show-url-info interceptor. (Which is why I needed
695 * the above interceptors changes - a typical URL is
696 * "http://i.j.b/show-url-info?url=www.somesite.com/banner.gif".
697 * The previous mechanism would not have intercepted that, and
698 * if it had been intercepted then it then it would have replaced
701 * Revision 1.9 2001/05/28 17:26:33 jongfoster
702 * Fixing segfault if last header was crunched.
703 * Fixing Windows build (snprintf() is _snprintf() under Win32, but we
704 * can use the cross-platform sprintf() instead.)
706 * Revision 1.8 2001/05/27 22:17:04 oes
708 * - re_process_buffer no longer writes the modified buffer
709 * to the client, which was very ugly. It now returns the
710 * buffer, which it is then written by chat.
712 * - content_length now adjusts the Content-Length: header
713 * for modified documents rather than crunch()ing it.
714 * (Length info in csp->content_length, which is 0 for
715 * unmodified documents)
717 * - For this to work, sed() is called twice when filtering.
719 * Revision 1.7 2001/05/27 13:19:06 oes
720 * Patched Joergs solution for the content-length in.
722 * Revision 1.6 2001/05/26 13:39:32 jongfoster
723 * Only crunches Content-Length header if applying RE filtering.
724 * Without this fix, Microsoft Windows Update wouldn't work.
726 * Revision 1.5 2001/05/26 00:28:36 jongfoster
727 * Automatic reloading of config file.
728 * Removed obsolete SIGHUP support (Unix) and Reload menu option (Win32).
729 * Most of the global variables have been moved to a new
730 * struct configuration_spec, accessed through csp->config->globalname
731 * Most of the globals remaining are used by the Win32 GUI.
733 * Revision 1.4 2001/05/22 18:46:04 oes
735 * - Enabled filtering banners by size rather than URL
736 * by adding patterns that replace all standard banner
737 * sizes with the "Junkbuster" gif to the re_filterfile
739 * - Enabled filtering WebBugs by providing a pattern
740 * which kills all 1x1 images
742 * - Added support for PCRE_UNGREEDY behaviour to pcrs,
743 * which is selected by the (nonstandard and therefore
744 * capital) letter 'U' in the option string.
745 * It causes the quantifiers to be ungreedy by default.
746 * Appending a ? turns back to greedy (!).
748 * - Added a new interceptor ijb-send-banner, which
749 * sends back the "Junkbuster" gif. Without imagelist or
750 * MSIE detection support, or if tinygif = 1, or the
751 * URL isn't recognized as an imageurl, a lame HTML
752 * explanation is sent instead.
754 * - Added new feature, which permits blocking remote
755 * script redirects and firing back a local redirect
757 * The feature is conditionally compiled, i.e. it
758 * can be disabled with --disable-fast-redirects,
759 * plus it must be activated by a "fast-redirects"
760 * line in the config file, has its own log level
761 * and of course wants to be displayed by show-proxy-args
762 * Note: Boy, all the #ifdefs in 1001 locations and
763 * all the fumbling with configure.in and acconfig.h
764 * were *way* more work than the feature itself :-(
766 * - Because a generic redirect template was needed for
767 * this, tinygif = 3 now uses the same.
769 * - Moved GIFs, and other static HTTP response templates
774 * - Removed some >400 CRs again (Jon, you really worked
777 * Revision 1.3 2001/05/20 01:21:20 jongfoster
778 * Version 2.9.4 checkin.
779 * - Merged popupfile and cookiefile, and added control over PCRS
780 * filtering, in new "permissionsfile".
781 * - Implemented LOG_LEVEL_FATAL, so that if there is a configuration
782 * file error you now get a message box (in the Win32 GUI) rather
783 * than the program exiting with no explanation.
784 * - Made killpopup use the PCRS MIME-type checking and HTTP-header
786 * - Removed tabs from "config"
787 * - Moved duplicated url parsing code in "loaders.c" to a new funcition.
788 * - Bumped up version number.
790 * Revision 1.2 2001/05/17 23:02:36 oes
791 * - Made referrer option accept 'L' as a substitute for '§'
793 * Revision 1.1.1.1 2001/05/15 13:59:01 oes
794 * Initial import of version 2.9.3 source tree
797 *********************************************************************/
804 #include <sys/types.h>
814 * Convince GNU's libc to provide a strptime prototype.
817 #endif /*__GLIBC__ */
824 #if !defined(_WIN32) && !defined(__OS2__)
830 #ifdef FEATURE_PTHREAD
832 /* jcc.h is for mutex semapores only */
833 #endif /* def FEATURE_PTHREAD */
839 #include "jbsockets.h"
840 #include "miscutil.h"
845 #ifndef HAVE_STRPTIME
846 #include "strptime.h"
849 const char parsers_h_rcs[] = PARSERS_H_VERSION;
851 /* Fix a problem with Solaris. There should be no effect on other
853 * Solaris's isspace() is a macro which uses its argument directly
854 * as an array index. Therefore we need to make sure that high-bit
855 * characters generate +ve values, and ideally we also want to make
856 * the argument match the declared parameter type of "int".
858 * Why did they write a character function that can't take a simple
859 * "char" argument? Doh!
861 #define ijb_isupper(__X) isupper((int)(unsigned char)(__X))
862 #define ijb_tolower(__X) tolower((int)(unsigned char)(__X))
864 static char *get_header_line(struct iob *iob);
865 static jb_err scan_headers(struct client_state *csp);
866 static jb_err header_tagger(struct client_state *csp, char *header);
867 static jb_err parse_header_time(const char *header_time, time_t *result);
869 static jb_err crumble (struct client_state *csp, char **header);
870 static jb_err connection (struct client_state *csp, char **header);
871 static jb_err filter_header (struct client_state *csp, char **header);
872 static jb_err client_referrer (struct client_state *csp, char **header);
873 static jb_err client_uagent (struct client_state *csp, char **header);
874 static jb_err client_ua (struct client_state *csp, char **header);
875 static jb_err client_from (struct client_state *csp, char **header);
876 static jb_err client_send_cookie (struct client_state *csp, char **header);
877 static jb_err client_x_forwarded (struct client_state *csp, char **header);
878 static jb_err client_accept_encoding (struct client_state *csp, char **header);
879 static jb_err client_te (struct client_state *csp, char **header);
880 static jb_err client_max_forwards (struct client_state *csp, char **header);
881 static jb_err client_host (struct client_state *csp, char **header);
882 static jb_err client_if_modified_since (struct client_state *csp, char **header);
883 static jb_err client_accept_language (struct client_state *csp, char **header);
884 static jb_err client_if_none_match (struct client_state *csp, char **header);
885 static jb_err crunch_client_header (struct client_state *csp, char **header);
886 static jb_err client_x_filter (struct client_state *csp, char **header);
887 static jb_err client_range (struct client_state *csp, char **header);
888 static jb_err server_set_cookie (struct client_state *csp, char **header);
889 static jb_err server_content_type (struct client_state *csp, char **header);
890 static jb_err server_content_length (struct client_state *csp, char **header);
891 static jb_err server_content_md5 (struct client_state *csp, char **header);
892 static jb_err server_content_encoding (struct client_state *csp, char **header);
893 static jb_err server_transfer_coding (struct client_state *csp, char **header);
894 static jb_err server_http (struct client_state *csp, char **header);
895 static jb_err crunch_server_header (struct client_state *csp, char **header);
896 static jb_err server_last_modified (struct client_state *csp, char **header);
897 static jb_err server_content_disposition(struct client_state *csp, char **header);
899 static jb_err client_host_adder (struct client_state *csp);
900 static jb_err client_xtra_adder (struct client_state *csp);
901 static jb_err connection_close_adder (struct client_state *csp);
903 static jb_err create_forged_referrer(char **header, const char *hostport);
904 static jb_err create_fake_referrer(char **header, const char *fake_referrer);
905 static jb_err handle_conditional_hide_referrer_parameter(char **header,
906 const char *host, const int parameter_conditional_block);
909 * List of functions to run on a list of headers.
913 /** The header prefix to match */
916 /** The length of the prefix to match */
919 /** The function to apply to this line */
920 const parser_func_ptr parser;
923 static const struct parsers client_patterns[] = {
924 { "referer:", 8, client_referrer },
925 { "user-agent:", 11, client_uagent },
926 { "ua-", 3, client_ua },
927 { "from:", 5, client_from },
928 { "cookie:", 7, client_send_cookie },
929 { "x-forwarded-for:", 16, client_x_forwarded },
930 { "Accept-Encoding:", 16, client_accept_encoding },
931 { "TE:", 3, client_te },
932 { "Host:", 5, client_host },
933 { "if-modified-since:", 18, client_if_modified_since },
934 { "Keep-Alive:", 11, crumble },
935 { "connection:", 11, connection },
936 { "proxy-connection:", 17, crumble },
937 { "max-forwards:", 13, client_max_forwards },
938 { "Accept-Language:", 16, client_accept_language },
939 { "if-none-match:", 14, client_if_none_match },
940 { "Range:", 6, client_range },
941 { "Request-Range:", 14, client_range },
942 { "If-Range:", 9, client_range },
943 { "X-Filter:", 9, client_x_filter },
944 { "*", 0, crunch_client_header },
945 { "*", 0, filter_header },
949 static const struct parsers server_patterns[] = {
950 { "HTTP/", 5, server_http },
951 { "set-cookie:", 11, server_set_cookie },
952 { "connection:", 11, connection },
953 { "Content-Type:", 13, server_content_type },
954 { "Content-MD5:", 12, server_content_md5 },
955 { "Content-Encoding:", 17, server_content_encoding },
956 { "Transfer-Encoding:", 18, server_transfer_coding },
957 { "Keep-Alive:", 11, crumble },
958 { "content-disposition:", 20, server_content_disposition },
959 { "Last-Modified:", 14, server_last_modified },
960 { "*", 0, crunch_server_header },
961 { "*", 0, filter_header },
965 static const add_header_func_ptr add_client_headers[] = {
968 /* Temporarily disabled: client_accept_encoding_adder, */
969 connection_close_adder,
973 static const add_header_func_ptr add_server_headers[] = {
974 connection_close_adder,
978 /*********************************************************************
980 * Function : flush_socket
982 * Description : Write any pending "buffered" content.
985 * 1 : fd = file descriptor of the socket to read
986 * 2 : iob = The I/O buffer to flush, usually csp->iob.
988 * Returns : On success, the number of bytes written are returned (zero
989 * indicates nothing was written). On error, -1 is returned,
990 * and errno is set appropriately. If count is zero and the
991 * file descriptor refers to a regular file, 0 will be
992 * returned without causing any other effect. For a special
993 * file, the results are not portable.
995 *********************************************************************/
996 int flush_socket(jb_socket fd, struct iob *iob)
998 int len = iob->eod - iob->cur;
1005 if (write_socket(fd, iob->cur, (size_t)len))
1009 iob->eod = iob->cur = iob->buf;
1015 /*********************************************************************
1017 * Function : add_to_iob
1019 * Description : Add content to the buffered page, expanding the
1020 * buffer if necessary.
1023 * 1 : csp = Current client state (buffers, headers, etc...)
1024 * 2 : buf = holds the content to be added to the page
1025 * 3 : n = number of bytes to be added
1027 * Returns : JB_ERR_OK on success, JB_ERR_MEMORY if out-of-memory
1028 * or buffer limit reached.
1030 *********************************************************************/
1031 jb_err add_to_iob(struct client_state *csp, char *buf, int n)
1033 struct iob *iob = csp->iob;
1034 size_t used, offset, need, want;
1037 if (n <= 0) return JB_ERR_OK;
1039 used = (size_t)(iob->eod - iob->buf);
1040 offset = (size_t)(iob->cur - iob->buf);
1041 need = used + (size_t)n + 1;
1044 * If the buffer can't hold the new data, extend it first.
1045 * Use the next power of two if possible, else use the actual need.
1047 if (need > csp->config->buffer_limit)
1049 log_error(LOG_LEVEL_INFO, "Buffer limit reached while extending the buffer (iob)");
1050 return JB_ERR_MEMORY;
1053 if (need > iob->size)
1055 for (want = csp->iob->size ? csp->iob->size : 512; want <= need;) want *= 2;
1057 if (want <= csp->config->buffer_limit && NULL != (p = (char *)realloc(iob->buf, want)))
1061 else if (NULL != (p = (char *)realloc(iob->buf, need)))
1067 log_error(LOG_LEVEL_ERROR, "Extending the buffer (iob) failed: %E");
1068 return JB_ERR_MEMORY;
1071 /* Update the iob pointers */
1072 iob->cur = p + offset;
1073 iob->eod = p + used;
1077 /* copy the new data into the iob buffer */
1078 memcpy(iob->eod, buf, (size_t)n);
1080 /* point to the end of the data */
1083 /* null terminate == cheap insurance */
1092 /*********************************************************************
1094 * Function : decompress_iob
1096 * Description : Decompress buffered page, expanding the
1097 * buffer as necessary. csp->iob->cur
1098 * should point to the the beginning of the
1099 * compressed data block.
1102 * 1 : csp = Current client state (buffers, headers, etc...)
1104 * Returns : JB_ERR_OK on success,
1105 * JB_ERR_MEMORY if out-of-memory limit reached, and
1106 * JB_ERR_COMPRESS if error decompressing buffer.
1108 *********************************************************************/
1109 jb_err decompress_iob(struct client_state *csp)
1111 char *buf; /* new, uncompressed buffer */
1112 char *cur; /* Current iob position (to keep the original
1113 * iob->cur unmodified if we return early) */
1114 size_t bufsize; /* allocated size of the new buffer */
1115 size_t old_size; /* Content size before decompression */
1116 size_t skip_size; /* Number of bytes at the beginning of the iob
1117 that we should NOT decompress. */
1118 int status; /* return status of the inflate() call */
1119 z_stream zstr; /* used by calls to zlib */
1121 assert(csp->iob->cur - csp->iob->buf > 0);
1122 assert(csp->iob->eod - csp->iob->cur > 0);
1124 bufsize = csp->iob->size;
1125 skip_size = (size_t)(csp->iob->cur - csp->iob->buf);
1126 old_size = (size_t)(csp->iob->eod - csp->iob->cur);
1128 cur = csp->iob->cur;
1133 * This is to protect the parsing of gzipped data,
1134 * but it should(?) be valid for deflated data also.
1136 log_error(LOG_LEVEL_ERROR, "Buffer too small decompressing iob");
1137 return JB_ERR_COMPRESS;
1140 if (csp->content_type & CT_GZIP)
1143 * Our task is slightly complicated by the facts that data
1144 * compressed by gzip does not include a zlib header, and
1145 * that there is no easily accessible interface in zlib to
1146 * handle a gzip header. We strip off the gzip header by
1147 * hand, and later inform zlib not to expect a header.
1151 * Strip off the gzip header. Please see RFC 1952 for more
1152 * explanation of the appropriate fields.
1154 if ((*cur++ != (char)0x1f)
1155 || (*cur++ != (char)0x8b)
1156 || (*cur++ != Z_DEFLATED))
1158 log_error(LOG_LEVEL_ERROR, "Invalid gzip header when decompressing");
1159 return JB_ERR_COMPRESS;
1165 * XXX: These magic numbers should be replaced
1166 * with macros to give a better idea what they do.
1170 /* The gzip header has reserved bits set; bail out. */
1171 log_error(LOG_LEVEL_ERROR, "Invalid gzip header flags when decompressing");
1172 return JB_ERR_COMPRESS;
1176 /* Skip extra fields if necessary. */
1180 * Skip a given number of bytes, specified
1181 * as a 16-bit little-endian value.
1184 * XXX: This code used to be:
1186 * csp->iob->cur += *csp->iob->cur++ + (*csp->iob->cur++ << 8);
1188 * which I had to change into:
1190 * cur += *cur++ + (*cur++ << 8);
1192 * at which point gcc43 finally noticed that the value
1193 * of cur is undefined (it depends on which of the
1194 * summands is evaluated first).
1196 * I haven't come across a site where this
1197 * code is actually executed yet, but I hope
1201 skip_bytes = *cur++;
1202 skip_bytes = *cur++ << 8;
1204 assert(skip_bytes == *csp->iob->cur - 2 + ((*csp->iob->cur - 1) << 8));
1207 * The number of bytes to skip should be positive
1208 * and we'd like to stay in the buffer.
1210 if ((skip_bytes < 0) || (skip_bytes >= (csp->iob->eod - cur)))
1212 log_error(LOG_LEVEL_ERROR,
1213 "Unreasonable amount of bytes to skip (%d). Stopping decompression",
1215 return JB_ERR_COMPRESS;
1217 log_error(LOG_LEVEL_INFO,
1218 "Skipping %d bytes for gzip compression. Does this sound right?",
1223 /* Skip the filename if necessary. */
1226 /* A null-terminated string is supposed to follow. */
1227 while (*cur++ && (cur < csp->iob->eod));
1231 /* Skip the comment if necessary. */
1234 /* A null-terminated string is supposed to follow. */
1235 while (*cur++ && (cur < csp->iob->eod));
1238 /* Skip the CRC if necessary. */
1244 if (cur >= csp->iob->eod)
1247 * If the current position pointer reached or passed
1248 * the buffer end, we were obviously tricked to skip
1251 log_error(LOG_LEVEL_ERROR,
1252 "Malformed gzip header detected. Aborting decompression.");
1253 return JB_ERR_COMPRESS;
1257 else if (csp->content_type & CT_DEFLATE)
1260 * XXX: The debug level should be lowered
1261 * before the next stable release.
1263 log_error(LOG_LEVEL_INFO, "Decompressing deflated iob: %d", *cur);
1265 * In theory (that is, according to RFC 1950), deflate-compressed
1266 * data should begin with a two-byte zlib header and have an
1267 * adler32 checksum at the end. It seems that in practice only
1268 * the raw compressed data is sent. Note that this means that
1269 * we are not RFC 1950-compliant here, but the advantage is that
1270 * this actually works. :)
1272 * We add a dummy null byte to tell zlib where the data ends,
1273 * and later inform it not to expect a header.
1275 * Fortunately, add_to_iob() has thoughtfully null-terminated
1276 * the buffer; we can just increment the end pointer to include
1283 log_error(LOG_LEVEL_ERROR,
1284 "Unable to determine compression format for decompression");
1285 return JB_ERR_COMPRESS;
1288 /* Set up the fields required by zlib. */
1289 zstr.next_in = (Bytef *)cur;
1290 zstr.avail_in = (unsigned int)(csp->iob->eod - cur);
1291 zstr.zalloc = Z_NULL;
1292 zstr.zfree = Z_NULL;
1293 zstr.opaque = Z_NULL;
1296 * Passing -MAX_WBITS to inflateInit2 tells the library
1297 * that there is no zlib header.
1299 if (inflateInit2 (&zstr, -MAX_WBITS) != Z_OK)
1301 log_error(LOG_LEVEL_ERROR, "Error initializing decompression");
1302 return JB_ERR_COMPRESS;
1306 * Next, we allocate new storage for the inflated data.
1307 * We don't modify the existing iob yet, so in case there
1308 * is error in decompression we can recover gracefully.
1310 buf = zalloc(bufsize);
1313 log_error(LOG_LEVEL_ERROR, "Out of memory decompressing iob");
1314 return JB_ERR_MEMORY;
1317 assert(bufsize >= skip_size);
1318 memcpy(buf, csp->iob->buf, skip_size);
1319 zstr.avail_out = bufsize - skip_size;
1320 zstr.next_out = (Bytef *)buf + skip_size;
1322 /* Try to decompress the whole stream in one shot. */
1323 while (Z_BUF_ERROR == (status = inflate(&zstr, Z_FINISH)))
1325 /* We need to allocate more memory for the output buffer. */
1327 char *tmpbuf; /* used for realloc'ing the buffer */
1328 size_t oldbufsize = bufsize; /* keep track of the old bufsize */
1331 * If zlib wants more data then there's a problem, because
1332 * the complete compressed file should have been buffered.
1334 if (0 == zstr.avail_in)
1336 log_error(LOG_LEVEL_ERROR, "Unexpected end of compressed iob");
1337 return JB_ERR_COMPRESS;
1341 * If we tried the limit and still didn't have enough
1342 * memory, just give up.
1344 if (bufsize == csp->config->buffer_limit)
1346 log_error(LOG_LEVEL_ERROR, "Buffer limit reached while decompressing iob");
1347 return JB_ERR_MEMORY;
1350 /* Try doubling the buffer size each time. */
1353 /* Don't exceed the buffer limit. */
1354 if (bufsize > csp->config->buffer_limit)
1356 bufsize = csp->config->buffer_limit;
1359 /* Try to allocate the new buffer. */
1360 tmpbuf = realloc(buf, bufsize);
1363 log_error(LOG_LEVEL_ERROR, "Out of memory decompressing iob");
1365 return JB_ERR_MEMORY;
1369 char *oldnext_out = (char *)zstr.next_out;
1372 * Update the fields for inflate() to use the new
1373 * buffer, which may be in a location different from
1376 zstr.avail_out += bufsize - oldbufsize;
1377 zstr.next_out = (Bytef *)tmpbuf + bufsize - zstr.avail_out;
1380 * Compare with an uglier method of calculating these values
1381 * that doesn't require the extra oldbufsize variable.
1383 assert(zstr.avail_out == tmpbuf + bufsize - (char *)zstr.next_out);
1384 assert((char *)zstr.next_out == tmpbuf + ((char *)oldnext_out - buf));
1385 assert(zstr.avail_out > 0);
1391 if (Z_STREAM_ERROR == inflateEnd(&zstr))
1393 log_error(LOG_LEVEL_ERROR,
1394 "Inconsistent stream state after decompression: %s", zstr.msg);
1396 * XXX: Intentionally no return.
1398 * According to zlib.h, Z_STREAM_ERROR is returned
1399 * "if the stream state was inconsistent".
1401 * I assume in this case inflate()'s status
1402 * would also be something different than Z_STREAM_END
1403 * so this check should be redundant, but lets see.
1407 if (status != Z_STREAM_END)
1409 /* We failed to decompress the stream. */
1410 log_error(LOG_LEVEL_ERROR,
1411 "Error in decompressing to the buffer (iob): %s", zstr.msg);
1412 return JB_ERR_COMPRESS;
1416 * Finally, we can actually update the iob, since the
1417 * decompression was successful. First, free the old
1420 freez(csp->iob->buf);
1422 /* Now, update the iob to use the new buffer. */
1423 csp->iob->buf = buf;
1424 csp->iob->cur = csp->iob->buf + skip_size;
1425 csp->iob->eod = (char *)zstr.next_out;
1426 csp->iob->size = bufsize;
1429 * Make sure the new uncompressed iob obeys some minimal
1430 * consistency conditions.
1432 if ((csp->iob->buf < csp->iob->cur)
1433 && (csp->iob->cur <= csp->iob->eod)
1434 && (csp->iob->eod <= csp->iob->buf + csp->iob->size))
1436 const size_t new_size = (size_t)(csp->iob->eod - csp->iob->cur);
1439 log_error(LOG_LEVEL_RE_FILTER,
1440 "Decompression successful. Old size: %d, new size: %d.",
1441 old_size, new_size);
1445 /* zlib thinks this is OK, so lets do the same. */
1446 log_error(LOG_LEVEL_INFO, "Decompression didn't result in any content.");
1451 /* It seems that zlib did something weird. */
1452 log_error(LOG_LEVEL_ERROR,
1453 "Unexpected error decompressing the buffer (iob): %d==%d, %d>%d, %d<%d",
1454 csp->iob->cur, csp->iob->buf + skip_size, csp->iob->eod, csp->iob->buf,
1455 csp->iob->eod, csp->iob->buf + csp->iob->size);
1456 return JB_ERR_COMPRESS;
1462 #endif /* defined(FEATURE_ZLIB) */
1465 /*********************************************************************
1467 * Function : string_move
1469 * Description : memmove wrapper to move the last part of a string
1470 * towards the beginning, overwriting the part in
1471 * the middle. strlcpy() can't be used here as the
1475 * 1 : dst = Destination to overwrite
1476 * 2 : src = Source to move.
1480 *********************************************************************/
1481 static void string_move(char *dst, char *src)
1485 /* +1 to copy the terminating nul as well. */
1486 memmove(dst, src, strlen(src)+1);
1490 /*********************************************************************
1492 * Function : normalize_lws
1494 * Description : Reduces unquoted linear white space in headers
1495 * to a single space in accordance with RFC 2616 2.2.
1496 * This simplifies parsing and filtering later on.
1498 * XXX: Remove log messages before
1499 * the next stable release?
1502 * 1 : header = A header with linear white space to reduce.
1506 *********************************************************************/
1507 static void normalize_lws(char *header)
1513 if (ijb_isspace(*p) && ijb_isspace(*(p+1)))
1517 while (ijb_isspace(*q))
1521 log_error(LOG_LEVEL_HEADER, "Reducing white space in '%s'", header);
1522 string_move(p+1, q);
1527 log_error(LOG_LEVEL_HEADER,
1528 "Converting tab to space in '%s'", header);
1533 char *end_of_token = strstr(p+1, "\"");
1535 if (NULL != end_of_token)
1537 /* Don't mess with quoted text. */
1542 log_error(LOG_LEVEL_HEADER,
1543 "Ignoring single quote in '%s'", header);
1549 p = strchr(header, ':');
1550 if ((p != NULL) && (p != header) && ijb_isspace(*(p-1)))
1553 * There's still space before the colon.
1556 string_move(p-1, p);
1561 /*********************************************************************
1563 * Function : get_header
1565 * Description : This (odd) routine will parse the csp->iob
1566 * to get the next complete header.
1569 * 1 : iob = The I/O buffer to parse, usually csp->iob.
1571 * Returns : Any one of the following:
1573 * 1) a pointer to a dynamically allocated string that contains a header line
1574 * 2) NULL indicating that the end of the header was reached
1575 * 3) "" indicating that the end of the iob was reached before finding
1576 * a complete header line.
1578 *********************************************************************/
1579 char *get_header(struct iob *iob)
1583 header = get_header_line(iob);
1585 if ((header == NULL) || (*header == '\0'))
1588 * No complete header read yet, tell the client.
1593 while ((iob->cur[0] == ' ') || (iob->cur[0] == '\t'))
1596 * Header spans multiple lines, append the next one.
1598 char *continued_header;
1600 continued_header = get_header_line(iob);
1601 if ((continued_header == NULL) || (*continued_header == '\0'))
1604 * No complete header read yet, return what we got.
1605 * XXX: Should "unread" header instead.
1607 log_error(LOG_LEVEL_INFO,
1608 "Failed to read a multi-line header properly: '%s'",
1613 if (JB_ERR_OK != string_join(&header, continued_header))
1615 log_error(LOG_LEVEL_FATAL,
1616 "Out of memory while appending multiple headers.");
1620 /* XXX: remove before next stable release. */
1621 log_error(LOG_LEVEL_HEADER,
1622 "Merged multiple header lines to: '%s'",
1627 normalize_lws(header);
1634 /*********************************************************************
1636 * Function : get_header_line
1638 * Description : This (odd) routine will parse the csp->iob
1639 * to get the next header line.
1642 * 1 : iob = The I/O buffer to parse, usually csp->iob.
1644 * Returns : Any one of the following:
1646 * 1) a pointer to a dynamically allocated string that contains a header line
1647 * 2) NULL indicating that the end of the header was reached
1648 * 3) "" indicating that the end of the iob was reached before finding
1649 * a complete header line.
1651 *********************************************************************/
1652 static char *get_header_line(struct iob *iob)
1656 if ((iob->cur == NULL)
1657 || ((p = strchr(iob->cur, '\n')) == NULL))
1659 return(""); /* couldn't find a complete header */
1664 ret = strdup(iob->cur);
1667 /* FIXME No way to handle error properly */
1668 log_error(LOG_LEVEL_FATAL, "Out of memory in get_header_line()");
1673 if ((q = strchr(ret, '\r')) != NULL) *q = '\0';
1675 /* is this a blank line (i.e. the end of the header) ? */
1687 /*********************************************************************
1689 * Function : get_header_value
1691 * Description : Get the value of a given header from a chained list
1692 * of header lines or return NULL if no such header is
1693 * present in the list.
1696 * 1 : header_list = pointer to list
1697 * 2 : header_name = string with name of header to look for.
1698 * Trailing colon required, capitalization
1701 * Returns : NULL if not found, else value of header
1703 *********************************************************************/
1704 char *get_header_value(const struct list *header_list, const char *header_name)
1706 struct list_entry *cur_entry;
1710 assert(header_list);
1711 assert(header_name);
1712 length = strlen(header_name);
1714 for (cur_entry = header_list->first; cur_entry ; cur_entry = cur_entry->next)
1718 if (!strncmpic(cur_entry->str, header_name, length))
1721 * Found: return pointer to start of value
1723 ret = (char *) (cur_entry->str + length);
1724 while (*ret && ijb_isspace(*ret)) ret++;
1738 /*********************************************************************
1740 * Function : scan_headers
1742 * Description : Scans headers, applies tags and updates action bits.
1745 * 1 : csp = Current client state (buffers, headers, etc...)
1747 * Returns : JB_ERR_OK
1749 *********************************************************************/
1750 static jb_err scan_headers(struct client_state *csp)
1752 struct list_entry *h; /* Header */
1753 jb_err err = JB_ERR_OK;
1755 for (h = csp->headers->first; (err == JB_ERR_OK) && (h != NULL) ; h = h->next)
1757 /* Header crunch()ed in previous run? -> ignore */
1758 if (h->str == NULL) continue;
1759 log_error(LOG_LEVEL_HEADER, "scan: %s", h->str);
1760 err = header_tagger(csp, h->str);
1767 /*********************************************************************
1771 * Description : add, delete or modify lines in the HTTP header streams.
1772 * On entry, it receives a linked list of headers space
1773 * that was allocated dynamically (both the list nodes
1774 * and the header contents).
1776 * As a side effect it frees the space used by the original
1780 * 1 : csp = Current client state (buffers, headers, etc...)
1781 * 2 : filter_server_headers = Boolean to switch between
1782 * server and header filtering.
1784 * Returns : JB_ERR_OK in case off success, or
1785 * JB_ERR_MEMORY on out-of-memory error.
1787 *********************************************************************/
1788 jb_err sed(struct client_state *csp, int filter_server_headers)
1790 /* XXX: use more descriptive names. */
1791 struct list_entry *p;
1792 const struct parsers *v;
1793 const add_header_func_ptr *f;
1794 jb_err err = JB_ERR_OK;
1796 if (filter_server_headers)
1798 v = server_patterns;
1799 f = add_server_headers;
1803 v = client_patterns;
1804 f = add_client_headers;
1809 while ((err == JB_ERR_OK) && (v->str != NULL))
1811 for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL); p = p->next)
1813 /* Header crunch()ed in previous run? -> ignore */
1814 if (p->str == NULL) continue;
1816 /* Does the current parser handle this header? */
1817 if ((strncmpic(p->str, v->str, v->len) == 0) ||
1818 (v->len == CHECK_EVERY_HEADER_REMAINING))
1820 err = v->parser(csp, &(p->str));
1826 /* place additional headers on the csp->headers list */
1827 while ((err == JB_ERR_OK) && (*f))
1837 /*********************************************************************
1839 * Function : update_server_headers
1841 * Description : Updates server headers after the body has been modified.
1844 * 1 : csp = Current client state (buffers, headers, etc...)
1846 * Returns : JB_ERR_OK in case off success, or
1847 * JB_ERR_MEMORY on out-of-memory error.
1849 *********************************************************************/
1850 jb_err update_server_headers(struct client_state *csp)
1852 jb_err err = JB_ERR_OK;
1854 static const struct parsers server_patterns_light[] = {
1855 { "Content-Length:", 15, server_content_length },
1856 { "Transfer-Encoding:", 18, server_transfer_coding },
1858 { "Content-Encoding:", 17, server_content_encoding },
1859 #endif /* def FEATURE_ZLIB */
1863 if (strncmpic(csp->http->cmd, "HEAD", 4))
1865 const struct parsers *v;
1866 struct list_entry *p;
1868 for (v = server_patterns_light; (err == JB_ERR_OK) && (v->str != NULL); v++)
1870 for (p = csp->headers->first; (err == JB_ERR_OK) && (p != NULL); p = p->next)
1872 /* Header crunch()ed in previous run? -> ignore */
1873 if (p->str == NULL) continue;
1875 /* Does the current parser handle this header? */
1876 if (strncmpic(p->str, v->str, v->len) == 0)
1878 err = v->parser(csp, (char **)&(p->str));
1888 /*********************************************************************
1890 * Function : header_tagger
1892 * Description : Executes all text substitutions from applying
1893 * tag actions and saves the result as tag.
1895 * XXX: Shares enough code with filter_header() and
1896 * pcrs_filter_response() to warrant some helper functions.
1899 * 1 : csp = Current client state (buffers, headers, etc...)
1900 * 2 : header = Header that is used as tagger input
1902 * Returns : JB_ERR_OK on success and always succeeds
1904 *********************************************************************/
1905 static jb_err header_tagger(struct client_state *csp, char *header)
1907 int wanted_filter_type;
1908 int multi_action_index;
1912 struct file_list *fl;
1913 struct re_filterfile_spec *b;
1914 struct list_entry *tag_name;
1916 int found_filters = 0;
1917 const size_t header_length = strlen(header);
1919 if (csp->flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE)
1921 wanted_filter_type = FT_SERVER_HEADER_TAGGER;
1922 multi_action_index = ACTION_MULTI_SERVER_HEADER_TAGGER;
1926 wanted_filter_type = FT_CLIENT_HEADER_TAGGER;
1927 multi_action_index = ACTION_MULTI_CLIENT_HEADER_TAGGER;
1930 /* Check if there are any filters */
1931 for (i = 0; i < MAX_AF_FILES; i++)
1944 if (0 == found_filters)
1946 log_error(LOG_LEVEL_ERROR, "Inconsistent configuration: "
1947 "tagging enabled, but no taggers available.");
1951 for (i = 0; i < MAX_AF_FILES; i++)
1954 if ((NULL == fl) || (NULL == fl->f))
1957 * Either there are no filter files
1958 * left, or this filter file just
1959 * contains no valid filters.
1961 * Continue to be sure we don't miss
1962 * valid filter files that are chained
1963 * after empty or invalid ones.
1968 /* For all filters, */
1969 for (b = fl->f; b; b = b->next)
1971 if (b->type != wanted_filter_type)
1973 /* skip the ones we don't care about, */
1976 /* leaving only taggers that could apply, of which we use the ones, */
1977 for (tag_name = csp->action->multi[multi_action_index]->first;
1978 NULL != tag_name; tag_name = tag_name->next)
1980 /* that do apply, and */
1981 if (strcmp(b->name, tag_name->str) == 0)
1983 char *modified_tag = NULL;
1985 size_t size = header_length;
1986 pcrs_job *joblist = b->joblist;
1988 if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b);
1990 if (NULL == joblist)
1992 log_error(LOG_LEVEL_RE_FILTER,
1993 "Tagger %s has empty joblist. Nothing to do.", b->name);
1997 /* execute their pcrs_joblist on the header. */
1998 for (job = joblist; NULL != job; job = job->next)
2000 const int hits = pcrs_execute(job, tag, size, &modified_tag, &size);
2004 /* Success, continue with the modified version. */
2013 /* Tagger doesn't match */
2016 /* Regex failure, log it but continue anyway. */
2017 log_error(LOG_LEVEL_ERROR,
2018 "Problems with tagger \'%s\' and header \'%s\': %s",
2019 b->name, *header, pcrs_strerror(hits));
2021 freez(modified_tag);
2025 if (b->dynamic) pcrs_free_joblist(joblist);
2027 /* If this tagger matched */
2033 * There is to technical limitation which makes
2034 * it impossible to use empty tags, but I assume
2035 * no one would do it intentionally.
2038 log_error(LOG_LEVEL_INFO,
2039 "Tagger \'%s\' created an empty tag. Ignored.",
2044 if (!list_contains_item(csp->tags, tag))
2046 if (JB_ERR_OK != enlist(csp->tags, tag))
2048 log_error(LOG_LEVEL_ERROR,
2049 "Insufficient memory to add tag \'%s\', "
2050 "based on tagger \'%s\' and header \'%s\'",
2051 tag, b->name, *header);
2055 char *action_message;
2057 * update the action bits right away, to make
2058 * tagging based on tags set by earlier taggers
2059 * of the same kind possible.
2061 if (update_action_bits_for_tag(csp, tag))
2063 action_message = "Action bits updated accordingly.";
2067 action_message = "No action bits update necessary.";
2070 log_error(LOG_LEVEL_HEADER,
2071 "Tagger \'%s\' added tag \'%s\'. %s",
2072 b->name, tag, action_message);
2077 /* XXX: Is this log-worthy? */
2078 log_error(LOG_LEVEL_HEADER,
2079 "Tagger \'%s\' didn't add tag \'%s\'. "
2080 "Tag already present", b->name, tag);
2083 } /* if the tagger matched */
2084 } /* if the tagger applies */
2085 } /* for every tagger that could apply */
2086 } /* for all filters */
2087 } /* for all filter files */
2092 /* here begins the family of parser functions that reformat header lines */
2094 /*********************************************************************
2096 * Function : filter_header
2098 * Description : Executes all text substitutions from all applying
2099 * +(server|client)-header-filter actions on the header.
2100 * Most of the code was copied from pcrs_filter_response,
2101 * including the rather short variable names
2104 * 1 : csp = Current client state (buffers, headers, etc...)
2105 * 2 : header = On input, pointer to header to modify.
2106 * On output, pointer to the modified header, or NULL
2107 * to remove the header. This function frees the
2108 * original string if necessary.
2110 * Returns : JB_ERR_OK on success and always succeeds
2112 *********************************************************************/
2113 static jb_err filter_header(struct client_state *csp, char **header)
2117 size_t size = strlen(*header);
2119 char *newheader = NULL;
2122 struct file_list *fl;
2123 struct re_filterfile_spec *b;
2124 struct list_entry *filtername;
2126 int i, found_filters = 0;
2127 int wanted_filter_type;
2128 int multi_action_index;
2130 if (csp->flags & CSP_FLAG_NO_FILTERING)
2135 if (csp->flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE)
2137 wanted_filter_type = FT_SERVER_HEADER_FILTER;
2138 multi_action_index = ACTION_MULTI_SERVER_HEADER_FILTER;
2142 wanted_filter_type = FT_CLIENT_HEADER_FILTER;
2143 multi_action_index = ACTION_MULTI_CLIENT_HEADER_FILTER;
2147 * Need to check the set of re_filterfiles...
2149 for (i = 0; i < MAX_AF_FILES; i++)
2162 if (0 == found_filters)
2164 log_error(LOG_LEVEL_ERROR, "Inconsistent configuration: "
2165 "header filtering enabled, but no matching filters available.");
2169 for (i = 0; i < MAX_AF_FILES; i++)
2172 if ((NULL == fl) || (NULL == fl->f))
2175 * Either there are no filter files
2176 * left, or this filter file just
2177 * contains no valid filters.
2179 * Continue to be sure we don't miss
2180 * valid filter files that are chained
2181 * after empty or invalid ones.
2186 * For all applying +filter actions, look if a filter by that
2187 * name exists and if yes, execute its pcrs_joblist on the
2190 for (b = fl->f; b; b = b->next)
2192 if (b->type != wanted_filter_type)
2194 /* Skip other filter types */
2198 for (filtername = csp->action->multi[multi_action_index]->first;
2199 filtername ; filtername = filtername->next)
2201 if (strcmp(b->name, filtername->str) == 0)
2203 int current_hits = 0;
2204 pcrs_job *joblist = b->joblist;
2206 if (b->dynamic) joblist = compile_dynamic_pcrs_job_list(csp, b);
2208 if (NULL == joblist)
2210 log_error(LOG_LEVEL_RE_FILTER, "Filter %s has empty joblist. Nothing to do.", b->name);
2214 log_error(LOG_LEVEL_RE_FILTER, "filtering \'%s\' (size %d) with \'%s\' ...",
2215 *header, size, b->name);
2217 /* Apply all jobs from the joblist */
2218 for (job = joblist; NULL != job; job = job->next)
2220 matches = pcrs_execute(job, *header, size, &newheader, &size);
2223 current_hits += matches;
2224 log_error(LOG_LEVEL_HEADER, "Transforming \"%s\" to \"%s\"", *header, newheader);
2226 *header = newheader;
2228 else if ( 0 == matches )
2230 /* Filter doesn't change header */
2236 log_error(LOG_LEVEL_ERROR, "Filtering \'%s\' with \'%s\' didn't work out: %s",
2237 *header, b->name, pcrs_strerror(matches));
2238 if (newheader != NULL)
2240 log_error(LOG_LEVEL_ERROR, "Freeing what's left: %s", newheader);
2246 if (b->dynamic) pcrs_free_joblist(joblist);
2248 log_error(LOG_LEVEL_RE_FILTER, "... produced %d hits (new size %d).", current_hits, size);
2249 hits += current_hits;
2256 * Additionally checking for hits is important because if
2257 * the continue hack is triggered, server headers can
2258 * arrive empty to separate multiple heads from each other.
2260 if ((0 == size) && hits)
2262 log_error(LOG_LEVEL_HEADER, "Removing empty header %s", *header);
2270 /*********************************************************************
2272 * Function : connection
2274 * Description : Makes sure that the value of the Connection: header
2275 * is "close" and signals connection_close_adder
2279 * 1 : csp = Current client state (buffers, headers, etc...)
2280 * 2 : header = On input, pointer to header to modify.
2281 * On output, pointer to the modified header, or NULL
2282 * to remove the header. This function frees the
2283 * original string if necessary.
2285 * Returns : JB_ERR_OK on success, or
2286 * JB_ERR_MEMORY on out-of-memory error.
2288 *********************************************************************/
2289 static jb_err connection(struct client_state *csp, char **header)
2291 char *old_header = *header;
2293 /* Do we have a 'Connection: close' header? */
2294 if (strcmpic(*header, "Connection: close"))
2296 /* No, create one */
2297 *header = strdup("Connection: close");
2300 return JB_ERR_MEMORY;
2302 log_error(LOG_LEVEL_HEADER, "Replaced: \'%s\' with \'%s\'", old_header, *header);
2306 /* Signal connection_close_adder() to return early. */
2307 if (csp->flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE)
2309 csp->flags |= CSP_FLAG_SERVER_CONNECTION_CLOSE_SET;
2313 csp->flags |= CSP_FLAG_CLIENT_CONNECTION_CLOSE_SET;
2320 /*********************************************************************
2322 * Function : crumble
2324 * Description : This is called if a header matches a pattern to "crunch"
2327 * 1 : csp = Current client state (buffers, headers, etc...)
2328 * 2 : header = On input, pointer to header to modify.
2329 * On output, pointer to the modified header, or NULL
2330 * to remove the header. This function frees the
2331 * original string if necessary.
2333 * Returns : JB_ERR_OK on success, or
2334 * JB_ERR_MEMORY on out-of-memory error.
2336 *********************************************************************/
2337 static jb_err crumble(struct client_state *csp, char **header)
2339 log_error(LOG_LEVEL_HEADER, "crumble crunched: %s!", *header);
2345 /*********************************************************************
2347 * Function : crunch_server_header
2349 * Description : Crunch server header if it matches a string supplied by the
2350 * user. Called from `sed'.
2353 * 1 : csp = Current client state (buffers, headers, etc...)
2354 * 2 : header = On input, pointer to header to modify.
2355 * On output, pointer to the modified header, or NULL
2356 * to remove the header. This function frees the
2357 * original string if necessary.
2359 * Returns : JB_ERR_OK on success and always succeeds
2361 *********************************************************************/
2362 static jb_err crunch_server_header(struct client_state *csp, char **header)
2364 const char *crunch_pattern;
2366 /* Do we feel like crunching? */
2367 if ((csp->action->flags & ACTION_CRUNCH_SERVER_HEADER))
2369 crunch_pattern = csp->action->string[ACTION_STRING_SERVER_HEADER];
2371 /* Is the current header the lucky one? */
2372 if (strstr(*header, crunch_pattern))
2374 log_error(LOG_LEVEL_HEADER, "Crunching server header: %s (contains: %s)", *header, crunch_pattern);
2383 /*********************************************************************
2385 * Function : server_content_type
2387 * Description : Set the content-type for filterable types (text/.*,
2388 * .*xml.*, javascript and image/gif) unless filtering has been
2389 * forbidden (CT_TABOO) while parsing earlier headers.
2390 * NOTE: Since text/plain is commonly used by web servers
2391 * for files whose correct type is unknown, we don't
2392 * set CT_TEXT for it.
2395 * 1 : csp = Current client state (buffers, headers, etc...)
2396 * 2 : header = On input, pointer to header to modify.
2397 * On output, pointer to the modified header, or NULL
2398 * to remove the header. This function frees the
2399 * original string if necessary.
2401 * Returns : JB_ERR_OK on success, or
2402 * JB_ERR_MEMORY on out-of-memory error.
2404 *********************************************************************/
2405 static jb_err server_content_type(struct client_state *csp, char **header)
2407 /* Remove header if it isn't the first Content-Type header */
2408 if ((csp->content_type & CT_DECLARED))
2411 * Another, slightly slower, way to see if
2412 * we already parsed another Content-Type header.
2414 assert(NULL != get_header_value(csp->headers, "Content-Type:"));
2416 log_error(LOG_LEVEL_ERROR,
2417 "Multiple Content-Type headers. Removing and ignoring: \'%s\'",
2425 * Signal that the Content-Type has been set.
2427 csp->content_type |= CT_DECLARED;
2429 if (!(csp->content_type & CT_TABOO))
2432 * XXX: The assumption that text/plain is a sign of
2433 * binary data seems to be somewhat unreasonable nowadays
2434 * and should be dropped after 3.0.8 is out.
2436 if ((strstr(*header, "text/") && !strstr(*header, "plain"))
2437 || strstr(*header, "xml")
2438 || strstr(*header, "application/x-javascript"))
2440 csp->content_type |= CT_TEXT;
2442 else if (strstr(*header, "image/gif"))
2444 csp->content_type |= CT_GIF;
2449 * Are we messing with the content type?
2451 if (csp->action->flags & ACTION_CONTENT_TYPE_OVERWRITE)
2454 * Make sure the user doesn't accidently
2455 * change the content type of binary documents.
2457 if ((csp->content_type & CT_TEXT) || (csp->action->flags & ACTION_FORCE_TEXT_MODE))
2460 *header = strdup("Content-Type: ");
2461 string_append(header, csp->action->string[ACTION_STRING_CONTENT_TYPE]);
2465 log_error(LOG_LEVEL_HEADER, "Insufficient memory to replace Content-Type!");
2466 return JB_ERR_MEMORY;
2468 log_error(LOG_LEVEL_HEADER, "Modified: %s!", *header);
2472 log_error(LOG_LEVEL_HEADER, "%s not replaced. "
2473 "It doesn't look like a content type that should be filtered. "
2474 "Enable force-text-mode if you know what you're doing.", *header);
2482 /*********************************************************************
2484 * Function : server_transfer_coding
2486 * Description : - Prohibit filtering (CT_TABOO) if transfer coding compresses
2487 * - Raise the CSP_FLAG_CHUNKED flag if coding is "chunked"
2488 * - Remove header if body was chunked but has been
2489 * de-chunked for filtering.
2492 * 1 : csp = Current client state (buffers, headers, etc...)
2493 * 2 : header = On input, pointer to header to modify.
2494 * On output, pointer to the modified header, or NULL
2495 * to remove the header. This function frees the
2496 * original string if necessary.
2498 * Returns : JB_ERR_OK on success, or
2499 * JB_ERR_MEMORY on out-of-memory error.
2501 *********************************************************************/
2502 static jb_err server_transfer_coding(struct client_state *csp, char **header)
2505 * Turn off pcrs and gif filtering if body compressed
2507 if (strstr(*header, "gzip") || strstr(*header, "compress") || strstr(*header, "deflate"))
2511 * XXX: Added to test if we could use CT_GZIP and CT_DEFLATE here.
2513 log_error(LOG_LEVEL_INFO, "Marking content type for %s as CT_TABOO because of %s.",
2514 csp->http->cmd, *header);
2515 #endif /* def FEATURE_ZLIB */
2516 csp->content_type = CT_TABOO;
2520 * Raise flag if body chunked
2522 if (strstr(*header, "chunked"))
2524 csp->flags |= CSP_FLAG_CHUNKED;
2527 * If the body was modified, it has been de-chunked first
2528 * and the header must be removed.
2530 * FIXME: If there is more than one transfer encoding,
2531 * only the "chunked" part should be removed here.
2533 if (csp->flags & CSP_FLAG_MODIFIED)
2535 log_error(LOG_LEVEL_HEADER, "Removing: %s", *header);
2544 /*********************************************************************
2546 * Function : server_content_encoding
2548 * Description : This function is run twice for each request,
2549 * unless FEATURE_ZLIB and filtering are disabled.
2551 * The first run is used to check if the content
2552 * is compressed, if FEATURE_ZLIB is disabled
2553 * filtering is then disabled as well, if FEATURE_ZLIB
2554 * is enabled the content is marked for decompression.
2556 * The second run is used to remove the Content-Encoding
2557 * header if the decompression was successful.
2560 * 1 : csp = Current client state (buffers, headers, etc...)
2561 * 2 : header = On input, pointer to header to modify.
2562 * On output, pointer to the modified header, or NULL
2563 * to remove the header. This function frees the
2564 * original string if necessary.
2566 * Returns : JB_ERR_OK on success, or
2567 * JB_ERR_MEMORY on out-of-memory error.
2569 *********************************************************************/
2570 static jb_err server_content_encoding(struct client_state *csp, char **header)
2573 if ((csp->flags & CSP_FLAG_MODIFIED)
2574 && (csp->content_type & (CT_GZIP | CT_DEFLATE)))
2577 * We successfully decompressed the content,
2578 * and have to clean the header now, so the
2579 * client no longer expects compressed data..
2581 * XXX: There is a difference between cleaning
2582 * and removing it completely.
2584 log_error(LOG_LEVEL_HEADER, "Crunching: %s", *header);
2587 else if (strstr(*header, "gzip"))
2589 /* Mark for gzip decompression */
2590 csp->content_type |= CT_GZIP;
2592 else if (strstr(*header, "deflate"))
2594 /* Mark for zlib decompression */
2595 csp->content_type |= CT_DEFLATE;
2597 else if (strstr(*header, "compress"))
2600 * We can't decompress this; therefore we can't filter
2603 csp->content_type |= CT_TABOO;
2605 #else /* !defined(FEATURE_ZLIB) */
2606 if (strstr(*header, "gzip") || strstr(*header, "compress") || strstr(*header, "deflate"))
2609 * Body is compressed, turn off pcrs and gif filtering.
2611 csp->content_type |= CT_TABOO;
2614 * Log a warning if the user expects the content to be filtered.
2616 if ((csp->rlist != NULL) &&
2617 (!list_is_empty(csp->action->multi[ACTION_MULTI_FILTER])))
2619 log_error(LOG_LEVEL_INFO,
2620 "Compressed content detected, content filtering disabled. "
2621 "Consider recompiling Privoxy with zlib support or "
2622 "enable the prevent-compression action.");
2625 #endif /* defined(FEATURE_ZLIB) */
2632 /*********************************************************************
2634 * Function : server_content_length
2636 * Description : Adjust Content-Length header if we modified
2640 * 1 : csp = Current client state (buffers, headers, etc...)
2641 * 2 : header = On input, pointer to header to modify.
2642 * On output, pointer to the modified header, or NULL
2643 * to remove the header. This function frees the
2644 * original string if necessary.
2646 * Returns : JB_ERR_OK on success, or
2647 * JB_ERR_MEMORY on out-of-memory error.
2649 *********************************************************************/
2650 static jb_err server_content_length(struct client_state *csp, char **header)
2652 const size_t max_header_length = 80;
2654 /* Regenerate header if the content was modified. */
2655 if (csp->flags & CSP_FLAG_MODIFIED)
2658 *header = (char *) zalloc(max_header_length);
2659 if (*header == NULL)
2661 return JB_ERR_MEMORY;
2664 snprintf(*header, max_header_length, "Content-Length: %d",
2665 (int)csp->content_length);
2666 log_error(LOG_LEVEL_HEADER, "Adjusted Content-Length to %d",
2667 (int)csp->content_length);
2674 /*********************************************************************
2676 * Function : server_content_md5
2678 * Description : Crumble any Content-MD5 headers if the document was
2679 * modified. FIXME: Should we re-compute instead?
2682 * 1 : csp = Current client state (buffers, headers, etc...)
2683 * 2 : header = On input, pointer to header to modify.
2684 * On output, pointer to the modified header, or NULL
2685 * to remove the header. This function frees the
2686 * original string if necessary.
2688 * Returns : JB_ERR_OK on success, or
2689 * JB_ERR_MEMORY on out-of-memory error.
2691 *********************************************************************/
2692 static jb_err server_content_md5(struct client_state *csp, char **header)
2694 if (csp->flags & CSP_FLAG_MODIFIED)
2696 log_error(LOG_LEVEL_HEADER, "Crunching Content-MD5");
2704 /*********************************************************************
2706 * Function : server_content_disposition
2708 * Description : If enabled, blocks or modifies the "Content-Disposition" header.
2709 * Called from `sed'.
2712 * 1 : csp = Current client state (buffers, headers, etc...)
2713 * 2 : header = On input, pointer to header to modify.
2714 * On output, pointer to the modified header, or NULL
2715 * to remove the header. This function frees the
2716 * original string if necessary.
2718 * Returns : JB_ERR_OK on success, or
2719 * JB_ERR_MEMORY on out-of-memory error.
2721 *********************************************************************/
2722 static jb_err server_content_disposition(struct client_state *csp, char **header)
2727 * Are we messing with the Content-Disposition header?
2729 if ((csp->action->flags & ACTION_HIDE_CONTENT_DISPOSITION) == 0)
2735 newval = csp->action->string[ACTION_STRING_CONTENT_DISPOSITION];
2737 if ((newval == NULL) || (0 == strcmpic(newval, "block")))
2740 * Blocking content-disposition header
2742 log_error(LOG_LEVEL_HEADER, "Crunching %s!", *header);
2749 * Replacing Content-Disposition header
2752 *header = strdup("Content-Disposition: ");
2753 string_append(header, newval);
2755 if (*header != NULL)
2757 log_error(LOG_LEVEL_HEADER,
2758 "Content-Disposition header crunched and replaced with: %s", *header);
2761 return (*header == NULL) ? JB_ERR_MEMORY : JB_ERR_OK;
2765 /*********************************************************************
2767 * Function : server_last_modified
2769 * Description : Changes Last-Modified header to the actual date
2770 * to help hide-if-modified-since.
2771 * Called from `sed'.
2774 * 1 : csp = Current client state (buffers, headers, etc...)
2775 * 2 : header = On input, pointer to header to modify.
2776 * On output, pointer to the modified header, or NULL
2777 * to remove the header. This function frees the
2778 * original string if necessary.
2780 * Returns : JB_ERR_OK on success, or
2781 * JB_ERR_MEMORY on out-of-memory error.
2783 *********************************************************************/
2784 static jb_err server_last_modified(struct client_state *csp, char **header)
2787 char buf[BUFFER_SIZE];
2790 #ifdef HAVE_GMTIME_R
2793 struct tm *timeptr = NULL;
2794 time_t now, last_modified;
2796 long int days, hours, minutes, seconds;
2799 * Are we messing with the Last-Modified header?
2801 if ((csp->action->flags & ACTION_OVERWRITE_LAST_MODIFIED) == 0)
2807 newval = csp->action->string[ACTION_STRING_LAST_MODIFIED];
2809 if (0 == strcmpic(newval, "block") )
2812 * Blocking Last-Modified header. Useless but why not.
2814 log_error(LOG_LEVEL_HEADER, "Crunching %s!", *header);
2818 else if (0 == strcmpic(newval, "reset-to-request-time"))
2821 * Setting Last-Modified Header to now.
2823 get_http_time(0, buf, sizeof(buf));
2825 *header = strdup("Last-Modified: ");
2826 string_append(header, buf);
2828 if (*header == NULL)
2830 log_error(LOG_LEVEL_HEADER, "Insufficent memory. Last-Modified header got lost, boohoo.");
2834 log_error(LOG_LEVEL_HEADER, "Reset to present time: %s", *header);
2837 else if (0 == strcmpic(newval, "randomize"))
2839 const char *header_time = *header + sizeof("Last-Modified:");
2841 log_error(LOG_LEVEL_HEADER, "Randomizing: %s", *header);
2843 #ifdef HAVE_GMTIME_R
2844 timeptr = gmtime_r(&now, &gmt);
2845 #elif FEATURE_PTHREAD
2846 pthread_mutex_lock(&gmtime_mutex);
2847 timeptr = gmtime(&now);
2848 pthread_mutex_unlock(&gmtime_mutex);
2850 timeptr = gmtime(&now);
2852 if (JB_ERR_OK != parse_header_time(header_time, &last_modified))
2854 log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s in %s (crunching!)", header_time, *header);
2859 rtime = (long int)difftime(now, last_modified);
2868 log_error(LOG_LEVEL_HEADER, "Server time in the future.");
2870 rtime = pick_from_range(rtime);
2871 if (negative) rtime *= -1;
2872 last_modified += rtime;
2873 #ifdef HAVE_GMTIME_R
2874 timeptr = gmtime_r(&last_modified, &gmt);
2875 #elif FEATURE_PTHREAD
2876 pthread_mutex_lock(&gmtime_mutex);
2877 timeptr = gmtime(&last_modified);
2878 pthread_mutex_unlock(&gmtime_mutex);
2880 timeptr = gmtime(&last_modified);
2882 strftime(newheader, sizeof(newheader), "%a, %d %b %Y %H:%M:%S GMT", timeptr);
2884 *header = strdup("Last-Modified: ");
2885 string_append(header, newheader);
2887 if (*header == NULL)
2889 log_error(LOG_LEVEL_ERROR, "Insufficent memory, header crunched without replacement.");
2890 return JB_ERR_MEMORY;
2893 if (LOG_LEVEL_HEADER & debug) /* Save cycles if the user isn't interested. */
2895 days = rtime / (3600 * 24);
2896 hours = rtime / 3600 % 24;
2897 minutes = rtime / 60 % 60;
2898 seconds = rtime % 60;
2900 log_error(LOG_LEVEL_HEADER, "Randomized: %s (added %d da%s %d hou%s %d minut%s %d second%s",
2901 *header, days, (days == 1) ? "y" : "ys", hours, (hours == 1) ? "r" : "rs",
2902 minutes, (minutes == 1) ? "e" : "es", seconds, (seconds == 1) ? ")" : "s)");
2907 log_error(LOG_LEVEL_HEADER, "Randomized ... or not. No time difference to work with.");
2916 /*********************************************************************
2918 * Function : client_accept_encoding
2920 * Description : Rewrite the client's Accept-Encoding header so that
2921 * if doesn't allow compression, if the action applies.
2922 * Note: For HTTP/1.0 the absence of the header is enough.
2925 * 1 : csp = Current client state (buffers, headers, etc...)
2926 * 2 : header = On input, pointer to header to modify.
2927 * On output, pointer to the modified header, or NULL
2928 * to remove the header. This function frees the
2929 * original string if necessary.
2931 * Returns : JB_ERR_OK on success, or
2932 * JB_ERR_MEMORY on out-of-memory error.
2934 *********************************************************************/
2935 static jb_err client_accept_encoding(struct client_state *csp, char **header)
2937 if ((csp->action->flags & ACTION_NO_COMPRESSION) != 0)
2939 log_error(LOG_LEVEL_HEADER, "Suppressed offer to compress content");
2943 /* Temporarily disable the correct behaviour to
2944 * work around a PHP bug.
2946 * if (!strcmpic(csp->http->ver, "HTTP/1.1"))
2948 * *header = strdup("Accept-Encoding: identity;q=1.0, *;q=0");
2949 * if (*header == NULL)
2951 * return JB_ERR_MEMORY;
2962 /*********************************************************************
2964 * Function : client_te
2966 * Description : Rewrite the client's TE header so that
2967 * if doesn't allow compression, if the action applies.
2970 * 1 : csp = Current client state (buffers, headers, etc...)
2971 * 2 : header = On input, pointer to header to modify.
2972 * On output, pointer to the modified header, or NULL
2973 * to remove the header. This function frees the
2974 * original string if necessary.
2976 * Returns : JB_ERR_OK on success, or
2977 * JB_ERR_MEMORY on out-of-memory error.
2979 *********************************************************************/
2980 static jb_err client_te(struct client_state *csp, char **header)
2982 if ((csp->action->flags & ACTION_NO_COMPRESSION) != 0)
2985 log_error(LOG_LEVEL_HEADER, "Suppressed offer to compress transfer");
2992 /*********************************************************************
2994 * Function : client_referrer
2996 * Description : Handle the "referer" config setting properly.
2997 * Called from `sed'.
3000 * 1 : csp = Current client state (buffers, headers, etc...)
3001 * 2 : header = On input, pointer to header to modify.
3002 * On output, pointer to the modified header, or NULL
3003 * to remove the header. This function frees the
3004 * original string if necessary.
3006 * Returns : JB_ERR_OK on success, or
3007 * JB_ERR_MEMORY on out-of-memory error.
3009 *********************************************************************/
3010 static jb_err client_referrer(struct client_state *csp, char **header)
3012 const char *parameter;
3013 /* booleans for parameters we have to check multiple times */
3014 int parameter_conditional_block;
3015 int parameter_conditional_forge;
3017 #ifdef FEATURE_FORCE_LOAD
3019 * Since the referrer can include the prefix even
3020 * if the request itself is non-forced, we must
3021 * clean it unconditionally.
3023 * XXX: strclean is too broad
3025 strclean(*header, FORCE_PREFIX);
3026 #endif /* def FEATURE_FORCE_LOAD */
3028 if ((csp->action->flags & ACTION_HIDE_REFERER) == 0)
3030 /* Nothing left to do */
3034 parameter = csp->action->string[ACTION_STRING_REFERER];
3035 assert(parameter != NULL);
3036 parameter_conditional_block = (0 == strcmpic(parameter, "conditional-block"));
3037 parameter_conditional_forge = (0 == strcmpic(parameter, "conditional-forge"));
3039 if (!parameter_conditional_block && !parameter_conditional_forge)
3042 * As conditional-block and conditional-forge are the only
3043 * parameters that rely on the original referrer, we can
3044 * remove it now for all the others.
3049 if (0 == strcmpic(parameter, "block"))
3051 log_error(LOG_LEVEL_HEADER, "Referer crunched!");
3054 else if (parameter_conditional_block || parameter_conditional_forge)
3056 return handle_conditional_hide_referrer_parameter(header,
3057 csp->http->hostport, parameter_conditional_block);
3059 else if (0 == strcmpic(parameter, "forge"))
3061 return create_forged_referrer(header, csp->http->hostport);
3065 /* interpret parameter as user-supplied referer to fake */
3066 return create_fake_referrer(header, parameter);
3071 /*********************************************************************
3073 * Function : client_accept_language
3075 * Description : Handle the "Accept-Language" config setting properly.
3076 * Called from `sed'.
3079 * 1 : csp = Current client state (buffers, headers, etc...)
3080 * 2 : header = On input, pointer to header to modify.
3081 * On output, pointer to the modified header, or NULL
3082 * to remove the header. This function frees the
3083 * original string if necessary.
3085 * Returns : JB_ERR_OK on success, or
3086 * JB_ERR_MEMORY on out-of-memory error.
3088 *********************************************************************/
3089 static jb_err client_accept_language(struct client_state *csp, char **header)
3094 * Are we messing with the Accept-Language?
3096 if ((csp->action->flags & ACTION_HIDE_ACCEPT_LANGUAGE) == 0)
3098 /*I don't think so*/
3102 newval = csp->action->string[ACTION_STRING_LANGUAGE];
3104 if ((newval == NULL) || (0 == strcmpic(newval, "block")) )
3107 * Blocking Accept-Language header
3109 log_error(LOG_LEVEL_HEADER, "Crunching Accept-Language!");
3116 * Replacing Accept-Language header
3119 *header = strdup("Accept-Language: ");
3120 string_append(header, newval);
3122 if (*header == NULL)
3124 log_error(LOG_LEVEL_ERROR,
3125 "Insufficent memory. Accept-Language header crunched without replacement.");
3129 log_error(LOG_LEVEL_HEADER,
3130 "Accept-Language header crunched and replaced with: %s", *header);
3133 return (*header == NULL) ? JB_ERR_MEMORY : JB_ERR_OK;
3137 /*********************************************************************
3139 * Function : crunch_client_header
3141 * Description : Crunch client header if it matches a string supplied by the
3142 * user. Called from `sed'.
3145 * 1 : csp = Current client state (buffers, headers, etc...)
3146 * 2 : header = On input, pointer to header to modify.
3147 * On output, pointer to the modified header, or NULL
3148 * to remove the header. This function frees the
3149 * original string if necessary.
3151 * Returns : JB_ERR_OK on success and always succeeds
3153 *********************************************************************/
3154 static jb_err crunch_client_header(struct client_state *csp, char **header)
3156 const char *crunch_pattern;
3158 /* Do we feel like crunching? */
3159 if ((csp->action->flags & ACTION_CRUNCH_CLIENT_HEADER))
3161 crunch_pattern = csp->action->string[ACTION_STRING_CLIENT_HEADER];
3163 /* Is the current header the lucky one? */
3164 if (strstr(*header, crunch_pattern))
3166 log_error(LOG_LEVEL_HEADER, "Crunching client header: %s (contains: %s)", *header, crunch_pattern);
3174 /*********************************************************************
3176 * Function : client_uagent
3178 * Description : Handle the "user-agent" config setting properly
3179 * and remember its original value to enable browser
3180 * bug workarounds. Called from `sed'.
3183 * 1 : csp = Current client state (buffers, headers, etc...)
3184 * 2 : header = On input, pointer to header to modify.
3185 * On output, pointer to the modified header, or NULL
3186 * to remove the header. This function frees the
3187 * original string if necessary.
3189 * Returns : JB_ERR_OK on success, or
3190 * JB_ERR_MEMORY on out-of-memory error.
3192 *********************************************************************/
3193 static jb_err client_uagent(struct client_state *csp, char **header)
3197 if ((csp->action->flags & ACTION_HIDE_USER_AGENT) == 0)
3202 newval = csp->action->string[ACTION_STRING_USER_AGENT];
3209 *header = strdup("User-Agent: ");
3210 string_append(header, newval);
3212 log_error(LOG_LEVEL_HEADER, "Modified: %s", *header);
3214 return (*header == NULL) ? JB_ERR_MEMORY : JB_ERR_OK;
3218 /*********************************************************************
3220 * Function : client_ua
3222 * Description : Handle "ua-" headers properly. Called from `sed'.
3225 * 1 : csp = Current client state (buffers, headers, etc...)
3226 * 2 : header = On input, pointer to header to modify.
3227 * On output, pointer to the modified header, or NULL
3228 * to remove the header. This function frees the
3229 * original string if necessary.
3231 * Returns : JB_ERR_OK on success, or
3232 * JB_ERR_MEMORY on out-of-memory error.
3234 *********************************************************************/
3235 static jb_err client_ua(struct client_state *csp, char **header)
3237 if ((csp->action->flags & ACTION_HIDE_USER_AGENT) != 0)
3239 log_error(LOG_LEVEL_HEADER, "crunched User-Agent!");
3247 /*********************************************************************
3249 * Function : client_from
3251 * Description : Handle the "from" config setting properly.
3252 * Called from `sed'.
3255 * 1 : csp = Current client state (buffers, headers, etc...)
3256 * 2 : header = On input, pointer to header to modify.
3257 * On output, pointer to the modified header, or NULL
3258 * to remove the header. This function frees the
3259 * original string if necessary.
3261 * Returns : JB_ERR_OK on success, or
3262 * JB_ERR_MEMORY on out-of-memory error.
3264 *********************************************************************/
3265 static jb_err client_from(struct client_state *csp, char **header)
3269 if ((csp->action->flags & ACTION_HIDE_FROM) == 0)
3276 newval = csp->action->string[ACTION_STRING_FROM];
3279 * Are we blocking the e-mail address?
3281 if ((newval == NULL) || (0 == strcmpic(newval, "block")) )
3283 log_error(LOG_LEVEL_HEADER, "crunched From!");
3287 log_error(LOG_LEVEL_HEADER, " modified");
3289 *header = strdup("From: ");
3290 string_append(header, newval);
3292 return (*header == NULL) ? JB_ERR_MEMORY : JB_ERR_OK;
3296 /*********************************************************************
3298 * Function : client_send_cookie
3300 * Description : Crunches the "cookie" header if necessary.
3301 * Called from `sed'.
3303 * XXX: Stupid name, doesn't send squat.
3306 * 1 : csp = Current client state (buffers, headers, etc...)
3307 * 2 : header = On input, pointer to header to modify.
3308 * On output, pointer to the modified header, or NULL
3309 * to remove the header. This function frees the
3310 * original string if necessary.
3312 * Returns : JB_ERR_OK on success, or
3313 * JB_ERR_MEMORY on out-of-memory error.
3315 *********************************************************************/
3316 static jb_err client_send_cookie(struct client_state *csp, char **header)
3318 if (csp->action->flags & ACTION_NO_COOKIE_READ)
3320 log_error(LOG_LEVEL_HEADER, "Crunched outgoing cookie: %s", *header);
3328 /*********************************************************************
3330 * Function : client_x_forwarded
3332 * Description : Handle the "x-forwarded-for" config setting properly,
3333 * also used in the add_client_headers list. Called from `sed'.
3336 * 1 : csp = Current client state (buffers, headers, etc...)
3337 * 2 : header = On input, pointer to header to modify.
3338 * On output, pointer to the modified header, or NULL
3339 * to remove the header. This function frees the
3340 * original string if necessary.
3342 * Returns : JB_ERR_OK on success, or
3343 * JB_ERR_MEMORY on out-of-memory error.
3345 *********************************************************************/
3346 jb_err client_x_forwarded(struct client_state *csp, char **header)
3348 if ((csp->action->flags & ACTION_HIDE_FORWARDED) != 0)
3351 log_error(LOG_LEVEL_HEADER, "crunched x-forwarded-for!");
3358 /*********************************************************************
3360 * Function : client_max_forwards
3362 * Description : If the HTTP method is OPTIONS or TRACE, subtract one
3363 * from the value of the Max-Forwards header field.
3366 * 1 : csp = Current client state (buffers, headers, etc...)
3367 * 2 : header = On input, pointer to header to modify.
3368 * On output, pointer to the modified header, or NULL
3369 * to remove the header. This function frees the
3370 * original string if necessary.
3372 * Returns : JB_ERR_OK on success, or
3373 * JB_ERR_MEMORY on out-of-memory error.
3375 *********************************************************************/
3376 static jb_err client_max_forwards(struct client_state *csp, char **header)
3380 if ((0 == strcmpic(csp->http->gpc, "trace")) ||
3381 (0 == strcmpic(csp->http->gpc, "options")))
3383 assert(*(*header+12) == ':');
3384 if (1 == sscanf(*header+12, ": %u", &max_forwards))
3386 if (max_forwards > 0)
3388 snprintf(*header, strlen(*header)+1, "Max-Forwards: %u", --max_forwards);
3389 log_error(LOG_LEVEL_HEADER, "Max-Forwards value for %s request reduced to %u.",
3390 csp->http->gpc, max_forwards);
3392 else if (max_forwards < 0)
3394 log_error(LOG_LEVEL_ERROR, "Crunching invalid header: %s", *header);
3400 log_error(LOG_LEVEL_ERROR, "Crunching invalid header: %s", *header);
3409 /*********************************************************************
3411 * Function : client_host
3413 * Description : If the request URI did not contain host and
3414 * port information, parse and evaluate the Host
3417 * Also, kill ill-formed HOST: headers as sent by
3418 * Apple's iTunes software when used with a proxy.
3421 * 1 : csp = Current client state (buffers, headers, etc...)
3422 * 2 : header = On input, pointer to header to modify.
3423 * On output, pointer to the modified header, or NULL
3424 * to remove the header. This function frees the
3425 * original string if necessary.
3427 * Returns : JB_ERR_OK on success, or
3428 * JB_ERR_MEMORY on out-of-memory error.
3430 *********************************************************************/
3431 static jb_err client_host(struct client_state *csp, char **header)
3436 * If the header field name is all upper-case, chances are that it's
3437 * an ill-formed one from iTunes. BTW, killing innocent headers here is
3438 * not a problem -- they are regenerated later.
3440 if ((*header)[1] == 'O')
3442 log_error(LOG_LEVEL_HEADER, "Killed all-caps Host header line: %s", *header);
3447 if (!csp->http->hostport || (*csp->http->hostport == '*') ||
3448 *csp->http->hostport == ' ' || *csp->http->hostport == '\0')
3451 if (NULL == (p = strdup((*header)+6)))
3453 return JB_ERR_MEMORY;
3456 if (NULL == (q = strdup(p)))
3459 return JB_ERR_MEMORY;
3462 freez(csp->http->hostport);
3463 csp->http->hostport = p;
3464 freez(csp->http->host);
3465 csp->http->host = q;
3466 q = strchr(csp->http->host, ':');
3469 /* Terminate hostname and evaluate port string */
3471 csp->http->port = atoi(q);
3475 csp->http->port = csp->http->ssl ? 443 : 80;
3478 log_error(LOG_LEVEL_HEADER, "New host and port from Host field: %s = %s:%d",
3479 csp->http->hostport, csp->http->host, csp->http->port);
3482 /* Signal client_host_adder() to return right away */
3483 csp->flags |= CSP_FLAG_HOST_HEADER_IS_SET;
3489 /*********************************************************************
3491 * Function : client_if_modified_since
3493 * Description : Remove or modify the If-Modified-Since header.
3496 * 1 : csp = Current client state (buffers, headers, etc...)
3497 * 2 : header = On input, pointer to header to modify.
3498 * On output, pointer to the modified header, or NULL
3499 * to remove the header. This function frees the
3500 * original string if necessary.
3502 * Returns : JB_ERR_OK on success, or
3503 * JB_ERR_MEMORY on out-of-memory error.
3505 *********************************************************************/
3506 static jb_err client_if_modified_since(struct client_state *csp, char **header)
3509 #ifdef HAVE_GMTIME_R
3512 struct tm *timeptr = NULL;
3516 long int hours, minutes, seconds;
3520 if ( 0 == strcmpic(*header, "If-Modified-Since: Wed, 08 Jun 1955 12:00:00 GMT"))
3523 * The client got an error message because of a temporary problem,
3524 * the problem is gone and the client now tries to revalidate our
3525 * error message on the real server. The revalidation would always
3526 * end with the transmission of the whole document and there is
3527 * no need to expose the bogus If-Modified-Since header.
3529 log_error(LOG_LEVEL_HEADER, "Crunching useless If-Modified-Since header.");
3532 else if (csp->action->flags & ACTION_HIDE_IF_MODIFIED_SINCE)
3534 newval = csp->action->string[ACTION_STRING_IF_MODIFIED_SINCE];
3536 if ((0 == strcmpic(newval, "block")))
3538 log_error(LOG_LEVEL_HEADER, "Crunching %s", *header);
3541 else /* add random value */
3543 const char *header_time = *header + sizeof("If-Modified-Since:");
3545 if (JB_ERR_OK != parse_header_time(header_time, &tm))
3547 log_error(LOG_LEVEL_HEADER, "Couldn't parse: %s in %s (crunching!)", header_time, *header);
3552 rtime = strtol(newval, &endptr, 0);
3555 log_error(LOG_LEVEL_HEADER, "Randomizing: %s (random range: %d minut%s)",
3556 *header, rtime, (rtime == 1 || rtime == -1) ? "e": "es");
3563 rtime = pick_from_range(rtime);
3567 log_error(LOG_LEVEL_ERROR, "Random range is 0. Assuming time transformation test.",
3570 tm += rtime * (negative ? -1 : 1);
3571 #ifdef HAVE_GMTIME_R
3572 timeptr = gmtime_r(&tm, &gmt);
3573 #elif FEATURE_PTHREAD
3574 pthread_mutex_lock(&gmtime_mutex);
3575 timeptr = gmtime(&tm);
3576 pthread_mutex_unlock(&gmtime_mutex);
3578 timeptr = gmtime(&tm);
3580 strftime(newheader, sizeof(newheader), "%a, %d %b %Y %H:%M:%S GMT", timeptr);
3583 *header = strdup("If-Modified-Since: ");
3584 string_append(header, newheader);
3586 if (*header == NULL)
3588 log_error(LOG_LEVEL_HEADER, "Insufficent memory, header crunched without replacement.");
3589 return JB_ERR_MEMORY;
3592 if (LOG_LEVEL_HEADER & debug) /* Save cycles if the user isn't interested. */
3594 hours = rtime / 3600;
3595 minutes = rtime / 60 % 60;
3596 seconds = rtime % 60;
3598 log_error(LOG_LEVEL_HEADER, "Randomized: %s (%s %d hou%s %d minut%s %d second%s",
3599 *header, (negative) ? "subtracted" : "added", hours, (hours == 1) ? "r" : "rs",
3600 minutes, (minutes == 1) ? "e" : "es", seconds, (seconds == 1) ? ")" : "s)");
3610 /*********************************************************************
3612 * Function : client_if_none_match
3614 * Description : Remove the If-None-Match header.
3617 * 1 : csp = Current client state (buffers, headers, etc...)
3618 * 2 : header = On input, pointer to header to modify.
3619 * On output, pointer to the modified header, or NULL
3620 * to remove the header. This function frees the
3621 * original string if necessary.
3623 * Returns : JB_ERR_OK on success, or
3624 * JB_ERR_MEMORY on out-of-memory error.
3626 *********************************************************************/
3627 static jb_err client_if_none_match(struct client_state *csp, char **header)
3629 if (csp->action->flags & ACTION_CRUNCH_IF_NONE_MATCH)
3631 log_error(LOG_LEVEL_HEADER, "Crunching %s", *header);
3639 /*********************************************************************
3641 * Function : client_x_filter
3643 * Description : Disables filtering if the client set "X-Filter: No".
3644 * Called from `sed'.
3647 * 1 : csp = Current client state (buffers, headers, etc...)
3648 * 2 : header = On input, pointer to header to modify.
3649 * On output, pointer to the modified header, or NULL
3650 * to remove the header. This function frees the
3651 * original string if necessary.
3653 * Returns : JB_ERR_OK on success
3655 *********************************************************************/
3656 jb_err client_x_filter(struct client_state *csp, char **header)
3658 if ( 0 == strcmpic(*header, "X-Filter: No"))
3660 if (!(csp->config->feature_flags & RUNTIME_FEATURE_HTTP_TOGGLE))
3662 log_error(LOG_LEVEL_INFO, "Ignored the client's request to fetch without filtering.");
3666 if (csp->action->flags & ACTION_FORCE_TEXT_MODE)
3668 log_error(LOG_LEVEL_HEADER,
3669 "force-text-mode overruled the client's request to fetch without filtering!");
3673 csp->content_type = CT_TABOO; /* XXX: This hack shouldn't be necessary */
3674 csp->flags |= CSP_FLAG_NO_FILTERING;
3675 log_error(LOG_LEVEL_HEADER, "Accepted the client's request to fetch without filtering.");
3677 log_error(LOG_LEVEL_HEADER, "Crunching %s", *header);
3685 /*********************************************************************
3687 * Function : client_range
3689 * Description : Removes Range, Request-Range and If-Range headers if
3690 * content filtering is enabled. If the client's version
3691 * of the document has been altered by Privoxy, the server
3692 * could interpret the range differently than the client
3693 * intended in which case the user could end up with
3694 * corrupted content.
3697 * 1 : csp = Current client state (buffers, headers, etc...)
3698 * 2 : header = On input, pointer to header to modify.
3699 * On output, pointer to the modified header, or NULL
3700 * to remove the header. This function frees the
3701 * original string if necessary.
3703 * Returns : JB_ERR_OK
3705 *********************************************************************/
3706 static jb_err client_range(struct client_state *csp, char **header)
3708 if (content_filters_enabled(csp->action))
3710 log_error(LOG_LEVEL_HEADER, "Content filtering is enabled."
3711 " Crunching: \'%s\' to prevent range-mismatch problems.", *header);
3718 /* the following functions add headers directly to the header list */
3720 /*********************************************************************
3722 * Function : client_host_adder
3724 * Description : Adds the Host: header field if it is missing.
3725 * Called from `sed'.
3728 * 1 : csp = Current client state (buffers, headers, etc...)
3730 * Returns : JB_ERR_OK on success, or
3731 * JB_ERR_MEMORY on out-of-memory error.
3733 *********************************************************************/
3734 static jb_err client_host_adder(struct client_state *csp)
3739 if (csp->flags & CSP_FLAG_HOST_HEADER_IS_SET)
3741 /* Header already set by the client, nothing to do. */
3745 if ( !csp->http->hostport || !*(csp->http->hostport))
3747 /* XXX: When does this happen and why is it OK? */
3748 log_error(LOG_LEVEL_INFO, "Weirdness in client_host_adder detected and ignored.");
3753 * remove 'user:pass@' from 'proto://user:pass@host'
3755 if ( (p = strchr( csp->http->hostport, '@')) != NULL )
3761 p = csp->http->hostport;
3764 /* XXX: Just add it, we already made sure that it will be unique */
3765 log_error(LOG_LEVEL_HEADER, "addh-unique: Host: %s", p);
3766 err = enlist_unique_header(csp->headers, "Host", p);
3773 /*********************************************************************
3775 * Function : client_accept_encoding_adder
3777 * Description : Add an Accept-Encoding header to the client's request
3778 * that disables compression if the action applies, and
3779 * the header is not already there. Called from `sed'.
3780 * Note: For HTTP/1.0, the absence of the header is enough.
3783 * 1 : csp = Current client state (buffers, headers, etc...)
3785 * Returns : JB_ERR_OK on success, or
3786 * JB_ERR_MEMORY on out-of-memory error.
3788 *********************************************************************/
3789 static jb_err client_accept_encoding_adder(struct client_state *csp)
3791 if ( ((csp->action->flags & ACTION_NO_COMPRESSION) != 0)
3792 && (!strcmpic(csp->http->ver, "HTTP/1.1")) )
3794 return enlist_unique(csp->headers, "Accept-Encoding: identity;q=1.0, *;q=0", 16);
3802 /*********************************************************************
3804 * Function : client_xtra_adder
3806 * Description : Used in the add_client_headers list. Called from `sed'.
3809 * 1 : csp = Current client state (buffers, headers, etc...)
3811 * Returns : JB_ERR_OK on success, or
3812 * JB_ERR_MEMORY on out-of-memory error.
3814 *********************************************************************/
3815 static jb_err client_xtra_adder(struct client_state *csp)
3817 struct list_entry *lst;
3820 for (lst = csp->action->multi[ACTION_MULTI_ADD_HEADER]->first;
3821 lst ; lst = lst->next)
3823 log_error(LOG_LEVEL_HEADER, "addh: %s", lst->str);
3824 err = enlist(csp->headers, lst->str);
3836 /*********************************************************************
3838 * Function : connection_close_adder
3840 * Description : "Temporary" fix for the needed but missing HTTP/1.1
3841 * support. Adds a "Connection: close" header to csp->headers
3842 * unless the header was already present. Called from `sed'.
3844 * FIXME: This whole function shouldn't be neccessary!
3847 * 1 : csp = Current client state (buffers, headers, etc...)
3849 * Returns : JB_ERR_OK on success, or
3850 * JB_ERR_MEMORY on out-of-memory error.
3852 *********************************************************************/
3853 static jb_err connection_close_adder(struct client_state *csp)
3855 const unsigned int flags = csp->flags;
3858 * Return right away if
3860 * - we're parsing server headers and the server header
3861 * "Connection: close" is already set, or if
3863 * - we're parsing client headers and the client header
3864 * "Connection: close" is already set.
3866 if ((flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE
3867 && flags & CSP_FLAG_SERVER_CONNECTION_CLOSE_SET)
3868 ||(!(flags & CSP_FLAG_CLIENT_HEADER_PARSING_DONE)
3869 && flags & CSP_FLAG_CLIENT_CONNECTION_CLOSE_SET))
3874 log_error(LOG_LEVEL_HEADER, "Adding: Connection: close");
3876 return enlist(csp->headers, "Connection: close");
3880 /*********************************************************************
3882 * Function : server_http
3884 * Description : - Save the HTTP Status into csp->http->status
3885 * - Set CT_TABOO to prevent filtering if the answer
3886 * is a partial range (HTTP status 206)
3887 * - Rewrite HTTP/1.1 answers to HTTP/1.0 if +downgrade
3891 * 1 : csp = Current client state (buffers, headers, etc...)
3892 * 2 : header = On input, pointer to header to modify.
3893 * On output, pointer to the modified header, or NULL
3894 * to remove the header. This function frees the
3895 * original string if necessary.
3897 * Returns : JB_ERR_OK on success, or
3898 * JB_ERR_MEMORY on out-of-memory error.
3900 *********************************************************************/
3901 static jb_err server_http(struct client_state *csp, char **header)
3903 sscanf(*header, "HTTP/%*d.%*d %d", &(csp->http->status));
3904 if (csp->http->status == 206)
3906 csp->content_type = CT_TABOO;
3909 if ((csp->action->flags & ACTION_DOWNGRADE) != 0)
3911 /* XXX: Should we do a real validity check here? */
3912 if (strlen(*header) > 8)
3915 log_error(LOG_LEVEL_HEADER, "Downgraded answer to HTTP/1.0");
3920 * XXX: Should we block the request or
3921 * enlist a valid status code line here?
3923 log_error(LOG_LEVEL_INFO, "Malformed server response detected. "
3924 "Downgrading to HTTP/1.0 impossible.");
3932 /*********************************************************************
3934 * Function : server_set_cookie
3936 * Description : Handle the server "cookie" header properly.
3937 * Log cookie to the jar file. Then "crunch",
3938 * accept or rewrite it to a session cookie.
3939 * Called from `sed'.
3941 * TODO: Allow the user to specify a new expiration
3942 * time to cause the cookie to expire even before the
3943 * browser is closed.
3946 * 1 : csp = Current client state (buffers, headers, etc...)
3947 * 2 : header = On input, pointer to header to modify.
3948 * On output, pointer to the modified header, or NULL
3949 * to remove the header. This function frees the
3950 * original string if necessary.
3952 * Returns : JB_ERR_OK on success, or
3953 * JB_ERR_MEMORY on out-of-memory error.
3955 *********************************************************************/
3956 static jb_err server_set_cookie(struct client_state *csp, char **header)
3963 #ifdef FEATURE_COOKIE_JAR
3964 if (csp->config->jar)
3967 * Write timestamp into outbuf.
3969 * Complex because not all OSs have tm_gmtoff or
3970 * the %z field in strftime()
3972 char tempbuf[ BUFFER_SIZE ];
3974 #ifdef HAVE_LOCALTIME_R
3975 tm_now = *localtime_r(&now, &tm_now);
3976 #elif FEATURE_PTHREAD
3977 pthread_mutex_lock(&localtime_mutex);
3978 tm_now = *localtime (&now);
3979 pthread_mutex_unlock(&localtime_mutex);
3981 tm_now = *localtime (&now);
3983 strftime(tempbuf, BUFFER_SIZE-6, "%b %d %H:%M:%S ", &tm_now);
3985 /* strlen("set-cookie: ") = 12 */
3986 fprintf(csp->config->jar, "%s %s\t%s\n", tempbuf, csp->http->host, *header + 12);
3988 #endif /* def FEATURE_COOKIE_JAR */
3990 if ((csp->action->flags & ACTION_NO_COOKIE_SET) != 0)
3992 log_error(LOG_LEVEL_HEADER, "Crunching incoming cookie: %s", *header);
3995 else if ((csp->action->flags & ACTION_NO_COOKIE_KEEP) != 0)
3997 /* Flag whether or not to log a message */
4000 /* A variable to store the tag we're working on */
4003 /* Skip "Set-Cookie:" (11 characters) in header */
4004 cur_tag = *header + 11;
4006 /* skip whitespace between "Set-Cookie:" and value */
4007 while (*cur_tag && ijb_isspace(*cur_tag))
4012 /* Loop through each tag in the cookie */
4016 char *next_tag = strchr(cur_tag, ';');
4017 if (next_tag != NULL)
4019 /* Skip the ';' character itself */
4022 /* skip whitespace ";" and start of tag */
4023 while (*next_tag && ijb_isspace(*next_tag))
4030 /* "Next tag" is the end of the string */
4031 next_tag = cur_tag + strlen(cur_tag);
4035 * Check the expiration date to see
4036 * if the cookie is still valid, if yes,
4037 * rewrite it to a session cookie.
4039 if ((strncmpic(cur_tag, "expires=", 8) == 0) && *(cur_tag + 8))
4041 char *expiration_date = cur_tag + 8; /* Skip "[Ee]xpires=" */
4043 /* Did we detect the date properly? */
4044 if (JB_ERR_OK != parse_header_time(expiration_date, &cookie_time))
4047 * Nope, treat it as if it was still valid.
4049 * XXX: Should we remove the whole cookie instead?
4051 log_error(LOG_LEVEL_ERROR,
4052 "Can't parse \'%s\', send by %s. Unsupported time format?", cur_tag, csp->http->url);
4053 string_move(cur_tag, next_tag);
4059 * Yes. Check if the cookie is still valid.
4061 * If the cookie is already expired it's probably
4062 * a delete cookie and even if it isn't, the browser
4063 * will discard it anyway.
4067 * XXX: timegm() isn't available on some AmigaOS
4068 * versions and our replacement doesn't work.
4070 * Our options are to either:
4072 * - disable session-cookies-only completely if timegm
4075 * - to simply remove all expired tags, like it has
4076 * been done until Privoxy 3.0.6 and to live with
4077 * the consequence that it can cause login/logout
4078 * problems on servers that don't validate their
4079 * input properly, or
4081 * - to replace it with mktime in which
4082 * case there is a slight chance of valid cookies
4083 * passing as already expired.
4085 * This is the way it's currently done and it's not
4086 * as bad as it sounds. If the missing GMT offset is
4087 * enough to change the result of the expiration check
4088 * the cookie will be only valid for a few hours
4089 * anyway, which in many cases will be shorter
4090 * than a browser session.
4092 if (cookie_time - now < 0)
4094 log_error(LOG_LEVEL_HEADER,
4095 "Cookie \'%s\' is already expired and can pass unmodified.", *header);
4096 /* Just in case some clown sets more then one expiration date */
4102 * Still valid, delete expiration date by copying
4103 * the rest of the string over it.
4105 string_move(cur_tag, next_tag);
4107 /* That changed the header, need to issue a log message */
4111 * Note that the next tag has now been moved to *cur_tag,
4112 * so we do not need to update the cur_tag pointer.
4120 /* Move on to next cookie tag */
4127 assert(NULL != *header);
4128 log_error(LOG_LEVEL_HEADER, "Cookie rewritten to a temporary one: %s",
4137 #ifdef FEATURE_FORCE_LOAD
4138 /*********************************************************************
4140 * Function : strclean
4142 * Description : In-Situ-Eliminate all occurances of substring in
4146 * 1 : string = string to clean
4147 * 2 : substring = substring to eliminate
4149 * Returns : Number of eliminations
4151 *********************************************************************/
4152 int strclean(const char *string, const char *substring)
4158 len = strlen(substring);
4160 while((pos = strstr(string, substring)) != NULL)
4167 while (*p++ != '\0');
4174 #endif /* def FEATURE_FORCE_LOAD */
4177 /*********************************************************************
4179 * Function : parse_header_time
4181 * Description : Parses time formats used in HTTP header strings
4182 * to get the numerical respresentation.
4185 * 1 : header_time = HTTP header time as string.
4186 * 2 : result = storage for header_time in seconds
4188 * Returns : JB_ERR_OK if the time format was recognized, or
4189 * JB_ERR_PARSE otherwise.
4191 *********************************************************************/
4192 static jb_err parse_header_time(const char *header_time, time_t *result)
4197 * Zero out gmt to prevent time zone offsets.
4199 * While this is only necessary on some platforms
4200 * (mingw32 for example), I don't know how to
4201 * detect these automatically and doing it everywhere
4204 memset(&gmt, 0, sizeof(gmt));
4206 /* Tue, 02 Jun 2037 20:00:00 */
4207 if ((NULL == strptime(header_time, "%a, %d %b %Y %H:%M:%S", &gmt))
4208 /* Tue, 02-Jun-2037 20:00:00 */
4209 && (NULL == strptime(header_time, "%a, %d-%b-%Y %H:%M:%S", &gmt))
4210 /* Tue, 02-Jun-37 20:00:00 */
4211 && (NULL == strptime(header_time, "%a, %d-%b-%y %H:%M:%S", &gmt))
4212 /* Tuesday, 02-Jun-2037 20:00:00 */
4213 && (NULL == strptime(header_time, "%A, %d-%b-%Y %H:%M:%S", &gmt))
4214 /* Tuesday Jun 02 20:00:00 2037 */
4215 && (NULL == strptime(header_time, "%A %b %d %H:%M:%S %Y", &gmt)))
4217 return JB_ERR_PARSE;
4220 *result = timegm(&gmt);
4227 /*********************************************************************
4229 * Function : get_destination_from_headers
4231 * Description : Parse the "Host:" header to get the request's destination.
4232 * Only needed if the client's request was forcefully
4233 * redirected into Privoxy.
4235 * Code mainly copied from client_host() which is currently
4236 * run too late for this purpose.
4239 * 1 : headers = List of headers (one of them hopefully being
4240 * the "Host:" header)
4241 * 2 : http = storage for the result (host, port and hostport).
4243 * Returns : JB_ERR_MEMORY in case of memory problems,
4244 * JB_ERR_PARSE if the host header couldn't be found,
4245 * JB_ERR_OK otherwise.
4247 *********************************************************************/
4248 jb_err get_destination_from_headers(const struct list *headers, struct http_request *http)
4254 host = get_header_value(headers, "Host:");
4258 log_error(LOG_LEVEL_ERROR, "No \"Host:\" header found.");
4259 return JB_ERR_PARSE;
4262 if (NULL == (p = strdup((host))))
4264 log_error(LOG_LEVEL_ERROR, "Out of memory while parsing \"Host:\" header");
4265 return JB_ERR_MEMORY;
4268 if (NULL == (q = strdup(p)))
4271 log_error(LOG_LEVEL_ERROR, "Out of memory while parsing \"Host:\" header");
4272 return JB_ERR_MEMORY;
4275 freez(http->hostport);
4279 q = strchr(http->host, ':');
4282 /* Terminate hostname and evaluate port string */
4284 http->port = atoi(q);
4288 http->port = http->ssl ? 443 : 80;
4291 /* Rebuild request URL */
4293 http->url = strdup(http->ssl ? "https://" : "http://");
4294 string_append(&http->url, http->hostport);
4295 string_append(&http->url, http->path);
4296 if (http->url == NULL)
4298 return JB_ERR_MEMORY;
4301 log_error(LOG_LEVEL_HEADER, "Destination extracted from \"Host:\" header. New request URL: %s",
4309 /*********************************************************************
4311 * Function : create_forged_referrer
4313 * Description : Helper for client_referrer to forge a referer as
4314 * 'http://[hostname:port/' to fool stupid
4315 * checks for in-site links
4318 * 1 : header = Pointer to header pointer
4319 * 2 : hostport = Host and optionally port as string
4321 * Returns : JB_ERR_OK in case of success, or
4322 * JB_ERR_MEMORY in case of memory problems.
4324 *********************************************************************/
4325 static jb_err create_forged_referrer(char **header, const char *hostport)
4327 assert(NULL == *header);
4329 *header = strdup("Referer: http://");
4330 string_append(header, hostport);
4331 string_append(header, "/");
4333 if (NULL == *header)
4335 return JB_ERR_MEMORY;
4338 log_error(LOG_LEVEL_HEADER, "Referer forged to: %s", *header);
4345 /*********************************************************************
4347 * Function : create_fake_referrer
4349 * Description : Helper for client_referrer to create a fake referrer
4350 * based on a string supplied by the user.
4353 * 1 : header = Pointer to header pointer
4354 * 2 : hosthost = Referrer to fake
4356 * Returns : JB_ERR_OK in case of success, or
4357 * JB_ERR_MEMORY in case of memory problems.
4359 *********************************************************************/
4360 static jb_err create_fake_referrer(char **header, const char *fake_referrer)
4362 assert(NULL == *header);
4364 if ((0 != strncmpic(fake_referrer, "http://", 7)) && (0 != strncmpic(fake_referrer, "https://", 8)))
4366 log_error(LOG_LEVEL_HEADER,
4367 "Parameter: +hide-referrer{%s} is a bad idea, but I don't care.", fake_referrer);
4369 *header = strdup("Referer: ");
4370 string_append(header, fake_referrer);
4372 if (NULL == *header)
4374 return JB_ERR_MEMORY;
4377 log_error(LOG_LEVEL_HEADER, "Referer replaced with: %s", *header);
4384 /*********************************************************************
4386 * Function : handle_conditional_hide_referrer_parameter
4388 * Description : Helper for client_referrer to crunch or forge
4389 * the referrer header if the host has changed.
4392 * 1 : header = Pointer to header pointer
4393 * 2 : host = The target host (may include the port)
4394 * 3 : parameter_conditional_block = Boolean to signal
4395 * if we're in conditional-block mode. If not set,
4396 * we're in conditional-forge mode.
4398 * Returns : JB_ERR_OK in case of success, or
4399 * JB_ERR_MEMORY in case of memory problems.
4401 *********************************************************************/
4402 static jb_err handle_conditional_hide_referrer_parameter(char **header,
4403 const char *host, const int parameter_conditional_block)
4405 char *referer = strdup(*header);
4406 const size_t hostlenght = strlen(host);
4408 if (NULL == referer)
4411 return JB_ERR_MEMORY;
4414 /* referer begins with 'Referer: http[s]://' */
4415 if (hostlenght < (strlen(referer)-17))
4418 * Shorten referer to make sure the referer is blocked
4419 * if www.example.org/www.example.com-shall-see-the-referer/
4420 * links to www.example.com/
4422 referer[hostlenght+17] = '\0';
4424 if (NULL == strstr(referer, host))
4426 /* Host has changed */
4427 if (parameter_conditional_block)
4429 log_error(LOG_LEVEL_HEADER, "New host is: %s. Crunching %s!", host, *header);
4436 return create_forged_referrer(header, host);