3 #define PROJECT_H_VERSION "$Id: project.h,v 1.18 2001/06/09 10:57:39 jongfoster Exp $"
4 /*********************************************************************
6 * File : $Source: /cvsroot/ijbswa/current/project.h,v $
8 * Purpose : Defines data structures which are widely used in the
9 * project. Does not define any variables or functions
10 * (though it does declare some macros).
12 * Copyright : Written by and Copyright (C) 2001 the SourceForge
13 * IJBSWA team. http://ijbswa.sourceforge.net
15 * Based on the Internet Junkbuster originally written
16 * by and Copyright (C) 1997 Anonymous Coders and
17 * Junkbusters Corporation. http://www.junkbusters.com
19 * This program is free software; you can redistribute it
20 * and/or modify it under the terms of the GNU General
21 * Public License as published by the Free Software
22 * Foundation; either version 2 of the License, or (at
23 * your option) any later version.
25 * This program is distributed in the hope that it will
26 * be useful, but WITHOUT ANY WARRANTY; without even the
27 * implied warranty of MERCHANTABILITY or FITNESS FOR A
28 * PARTICULAR PURPOSE. See the GNU General Public
29 * License for more details.
31 * The GNU General Public License should be included with
32 * this file. If not, you can view it at
33 * http://www.gnu.org/copyleft/gpl.html
34 * or write to the Free Software Foundation, Inc., 59
35 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
39 * Revision 1.18 2001/06/09 10:57:39 jongfoster
40 * Adding definition of BUFFER_SIZE.
41 * Changing struct cgi_dispatcher to use "const" strings.
43 * Revision 1.17 2001/06/07 23:15:09 jongfoster
44 * Merging ACL and forward files into config file.
45 * Moving struct gateway members into struct forward_spec
46 * Removing config->proxy_args_gateways
47 * Cosmetic: Adding a few comments
49 * Revision 1.16 2001/06/04 18:31:58 swa
50 * files are now prefixed with either `confdir' or `logdir'.
51 * `make redhat-dist' replaces both entries confdir and logdir
54 * Revision 1.15 2001/06/04 11:28:53 swa
55 * redirect did not work due to missing /
57 * Revision 1.14 2001/06/03 11:03:48 oes
59 * added struct http_response,
60 * changed struct interceptors to struct cgi_dispatcher,
61 * moved HTML stuff to cgi.h
63 * Revision 1.13 2001/06/01 20:05:36 jongfoster
64 * Support for +image-blocker{}: added ACTION_IMAGE_BLOCKER
65 * constant, and removed csp->tinygif.
67 * Revision 1.12 2001/06/01 18:49:17 jongfoster
68 * Replaced "list_share" with "list" - the tiny memory gain was not
69 * worth the extra complexity.
71 * Revision 1.11 2001/06/01 10:32:47 oes
72 * Added constants for anchoring selection bitmap
74 * Revision 1.10 2001/05/31 21:33:53 jongfoster
75 * Changes for new actions file, replacing permissionsfile
76 * and parts of the config file. Also added support for
79 * Revision 1.9 2001/05/31 17:32:31 oes
81 * - Enhanced domain part globbing with infix and prefix asterisk
82 * matching and optional unanchored operation
84 * Revision 1.8 2001/05/29 20:09:15 joergs
85 * HTTP_REDIRECT_TEMPLATE fixed.
87 * Revision 1.7 2001/05/29 09:50:24 jongfoster
88 * Unified blocklist/imagelist/actionslist.
89 * File format is still under discussion, but the internal changes
92 * Also modified interceptor behaviour:
93 * - We now intercept all URLs beginning with one of the following
94 * prefixes (and *only* these prefixes):
96 * * http://ijbswa.sf.net/config/
97 * * http://ijbswa.sourceforge.net/config/
98 * - New interceptors "home page" - go to http://i.j.b/ to see it.
99 * - Internal changes so that intercepted and fast redirect pages
100 * are not replaced with an image.
101 * - Interceptors now have the option to send a binary page direct
102 * to the client. (i.e. ijb-send-banner uses this)
103 * - Implemented show-url-info interceptor. (Which is why I needed
104 * the above interceptors changes - a typical URL is
105 * "http://i.j.b/show-url-info?url=www.somesite.com/banner.gif".
106 * The previous mechanism would not have intercepted that, and
107 * if it had been intercepted then it then it would have replaced
110 * Revision 1.6 2001/05/27 22:17:04 oes
112 * - re_process_buffer no longer writes the modified buffer
113 * to the client, which was very ugly. It now returns the
114 * buffer, which it is then written by chat.
116 * - content_length now adjusts the Content-Length: header
117 * for modified documents rather than crunch()ing it.
118 * (Length info in csp->content_length, which is 0 for
119 * unmodified documents)
121 * - For this to work, sed() is called twice when filtering.
123 * Revision 1.5 2001/05/26 00:28:36 jongfoster
124 * Automatic reloading of config file.
125 * Removed obsolete SIGHUP support (Unix) and Reload menu option (Win32).
126 * Most of the global variables have been moved to a new
127 * struct configuration_spec, accessed through csp->config->globalname
128 * Most of the globals remaining are used by the Win32 GUI.
130 * Revision 1.4 2001/05/22 18:46:04 oes
132 * - Enabled filtering banners by size rather than URL
133 * by adding patterns that replace all standard banner
134 * sizes with the "Junkbuster" gif to the re_filterfile
136 * - Enabled filtering WebBugs by providing a pattern
137 * which kills all 1x1 images
139 * - Added support for PCRE_UNGREEDY behaviour to pcrs,
140 * which is selected by the (nonstandard and therefore
141 * capital) letter 'U' in the option string.
142 * It causes the quantifiers to be ungreedy by default.
143 * Appending a ? turns back to greedy (!).
145 * - Added a new interceptor ijb-send-banner, which
146 * sends back the "Junkbuster" gif. Without imagelist or
147 * MSIE detection support, or if tinygif = 1, or the
148 * URL isn't recognized as an imageurl, a lame HTML
149 * explanation is sent instead.
151 * - Added new feature, which permits blocking remote
152 * script redirects and firing back a local redirect
154 * The feature is conditionally compiled, i.e. it
155 * can be disabled with --disable-fast-redirects,
156 * plus it must be activated by a "fast-redirects"
157 * line in the config file, has its own log level
158 * and of course wants to be displayed by show-proxy-args
159 * Note: Boy, all the #ifdefs in 1001 locations and
160 * all the fumbling with configure.in and acconfig.h
161 * were *way* more work than the feature itself :-(
163 * - Because a generic redirect template was needed for
164 * this, tinygif = 3 now uses the same.
166 * - Moved GIFs, and other static HTTP response templates
171 * - Removed some >400 CRs again (Jon, you really worked
174 * Revision 1.3 2001/05/20 01:21:20 jongfoster
175 * Version 2.9.4 checkin.
176 * - Merged popupfile and cookiefile, and added control over PCRS
177 * filtering, in new "actionsfile".
178 * - Implemented LOG_LEVEL_FATAL, so that if there is a configuration
179 * file error you now get a message box (in the Win32 GUI) rather
180 * than the program exiting with no explanation.
181 * - Made killpopup use the PCRS MIME-type checking and HTTP-header
183 * - Removed tabs from "config"
184 * - Moved duplicated url parsing code in "loaders.c" to a new funcition.
185 * - Bumped up version number.
187 * Revision 1.2 2001/05/17 23:01:01 oes
188 * - Cleaned CRLF's from the sources and related files
190 * Revision 1.1.1.1 2001/05/15 13:59:03 oes
191 * Initial import of version 2.9.3 source tree
194 *********************************************************************/
197 /* Declare struct FILE for vars and funcs. */
200 /* Need time_t for file_list */
204 * Include appropriate regular expression libraries.
206 * PCRS ==> Include pcre
207 * REGEX && PCRE ==> Include pcre and pcreposix
208 * REGEX && !PCRE ==> Include gnu_regex
210 * STATIC ==> Use #include "pcre.h" (compiling at same time)
211 * !STATIC ==> Use #include <pcre.h> (System library)
214 #if (defined(REGEX) && defined(PCRE)) || defined(PCRS)
220 #endif /* (defined(REGEX) && defined(PCRE)) || defined(PCRS) */
222 #if defined(REGEX) && defined(PCRE)
224 # include "pcreposix.h"
226 # include <pcreposix.h>
228 #endif /* defined(REGEX) && defined(PCRE) */
230 #if defined(REGEX) && !defined(PCRE)
231 # include "gnu_regex.h"
236 #endif /* def PCRS */
240 #endif /* def AMIGA */
246 #define BUFFER_SIZE 5000
\r
250 /* Default IP and port to listen on */
251 #define HADDR_DEFAULT "127.0.0.1"
252 #define HADDR_PORT 8000
255 /* Need this for struct client_state */
256 struct configuration_spec;
258 /* Generic linked list of strings */
259 struct list /* FIXME: Why not separate entries and header? */
261 char * str; /* valid in an entry */
262 struct list *last; /* valid in header */
278 char *host_ip_addr_str; /* NULL before connect_to() */
282 char *hostport; /* "host[:port]" */
286 /* Response generated by CGI, blocker, or error handler */
289 char *status; /* HTTP status (string)*/
290 struct list headers[1]; /* List of header lines */
291 char *head; /* Formatted http response head */
292 int head_length; /* Length of http response head */
293 char *body; /* HTTP document body */
294 int content_length; /* Length of body, REQUIRED if binary body*/
300 char *spec; /* The string which was parsed to produce this */
301 /* url_spec. Used for debugging or display only. */
303 /* Hostname matching: */
304 char *domain; /* Fully qalified domain name (FQDN) pattern. */
305 /* May contain "*". */
306 char *dbuf; /* Buffer with '\0'-delimited fqdn */
307 char **dvec; /* Domain ptr vector into dbuf */
308 int dcnt; /* How many domains in fqdn? */
309 int unanchored; /* Bitmap - flags are ANCHOR_LEFT and ANCHOR_RIGHT */
312 int port; /* The port number, or 0 to match all ports. */
315 char *path; /* The path prefix (if not using regex), or source */
317 int pathlen; /* ==strlen(path). Needed for prefix matching. */
319 regex_t *preg; /* Regex for matching path part */
323 #define ANCHOR_LEFT 1
324 #define ANCHOR_RIGHT 2
337 #define IOB_PEEK(CSP) ((CSP->iob->cur > CSP->iob->eod) ? (CSP->iob->eod - CSP->iob->cur) : 0)
338 #define IOB_RESET(CSP) if(CSP->iob->buf) free(CSP->iob->buf); memset(CSP->iob, '\0', sizeof(CSP->iob));
342 #define ACTION_MASK_ALL (~0U)
344 #define ACTION_MOST_COMPATIBLE 0x0000U
346 #define ACTION_BLOCK 0x0001U
347 #define ACTION_FAST_REDIRECTS 0x0002U
348 #define ACTION_FILTER 0x0004U
349 #define ACTION_HIDE_FORWARDED 0x0008U
350 #define ACTION_HIDE_FROM 0x0010U
351 #define ACTION_HIDE_REFERER 0x0020U /* sic - follow HTTP, not English */
352 #define ACTION_HIDE_USER_AGENT 0x0040U
353 #define ACTION_IMAGE 0x0080U
354 #define ACTION_IMAGE_BLOCKER 0x0100U
355 #define ACTION_NO_COOKIE_READ 0x0200U
356 #define ACTION_NO_COOKIE_SET 0x0400U
357 #define ACTION_NO_POPUPS 0x0800U
358 #define ACTION_VANILLA_WAFER 0x1000U
360 #define ACTION_STRING_FROM 0
361 #define ACTION_STRING_IMAGE_BLOCKER 1
362 #define ACTION_STRING_REFERER 2
363 #define ACTION_STRING_USER_AGENT 3
364 #define ACTION_STRING_COUNT 4
366 #define ACTION_MULTI_ADD_HEADER 0
367 #define ACTION_MULTI_WAFER 1
368 #define ACTION_MULTI_COUNT 2
371 * This structure contains a list of actions to apply to a URL.
372 * It only contains positive instructions - no "-" options.
373 * It is not used to store the actions list itself, only for
374 * url_actions() to return the current values.
376 struct current_action_spec
378 unsigned flags; /* a bit set to "1" = add action */
380 /* For those actions that require parameters: */
382 /* each entry is valid if & only if corresponding entry in "add" set. */
383 char * string[ACTION_STRING_COUNT];
386 struct list multi[ACTION_MULTI_COUNT][1];
391 * This structure contains a set of changes to actions.
392 * It can contain both positive and negative instructions.
393 * It is used to store an entry in the actions list.
397 unsigned mask; /* a bit set to "0" = remove action */
398 unsigned add; /* a bit set to "1" = add action */
400 /* For those actions that require parameters: */
402 /* each entry is valid if & only if corresponding entry in "add" set. */
403 char * string[ACTION_STRING_COUNT];
405 /* Strings to remove. */
406 struct list multi_remove[ACTION_MULTI_COUNT][1];
408 /* If nonzero, remove *all* strings. */
409 int multi_remove_all[ACTION_MULTI_COUNT];
412 struct list multi_add[ACTION_MULTI_COUNT][1];
416 * This structure is used to store the actions list.
418 * It contains a URL pattern, and the chages to the actions.
419 * It is a linked list.
423 struct url_spec url[1];
425 struct action_spec action[1];
427 struct url_actions * next;
431 /* Constants defining bitmask for csp->accept_types */
433 #ifdef DETECT_MSIE_IMAGES
435 /* MSIE detected by user-agent string */
436 #define ACCEPT_TYPE_IS_MSIE 0x0001
439 * *If* this is MSIE, it wants an image. (Or this is a shift-reload, or
440 * it's got an image from this URL before... yuck!)
441 * Only meaningful if ACCEPT_TYPE_IS_MSIE set
443 #define ACCEPT_TYPE_MSIE_IMAGE 0x0002
446 * *If* this is MSIE, it wants a HTML document.
447 * Only meaningful if ACCEPT_TYPE_IS_MSIE set
449 #define ACCEPT_TYPE_MSIE_HTML 0x0004
451 #endif /* def DETECT_MSIE_IMAGES */
456 /* The proxy's configuration */
457 struct configuration_spec * config;
459 /* The actions to perform on the current request */
460 struct current_action_spec action[1];
462 /* socket to talk to client (web browser) */
465 /* socket to talk to server (web server or proxy) */
470 /* 1 if this URL was rejected, 0 otherwise. Allows actual stats inc to
471 * occur in main thread only for thread-safety.
474 #endif /* def STATISTICS */
478 #endif /* def FORCE_LOAD */
482 #endif /* def TOGGLE */
485 * Client PC's IP address, as reported by the accept()_ function.
486 * Both as string and number
492 /* Our IP address and hostname, i.e. the IP address that
493 the client used to reach us, and the associated hostname,
496 char *my_ip_addr_str;
500 /* The referer in this request, if one was specified. */
502 #endif /* def TRUST_FILES */
504 #if defined(DETECT_MSIE_IMAGES)
505 /* Types the client will accept.
506 * Bitmask - see ACCEPT_TYPE_XXX constants.
509 #endif /* defined(DETECT_MSIE_IMAGES) */
511 /* The URL that was requested */
512 struct http_request http[1];
514 /* An I/O buffer used for buffering data read from the client */
517 /* List of all headers for this request */
518 struct list headers[1];
520 /* List of all cookies for this request */
521 struct list cookie_list[1];
523 #if defined(PCRS) || defined(KILLPOPUPS)
524 /* Nonzero if this has a text MIME type */
526 #endif /* defined(PCRS) || defined(KILLPOPUPS) */
528 /* The "X-Forwarded-For:" header sent by the client */
532 * Nonzero if this client is processing data.
533 * Set to zero when the thread associated with this structure dies.
537 /* files associated with this client */
538 struct file_list *actions_list;
541 struct file_list *rlist; /* Perl re_filterfile */
542 size_t content_length; /* Length after processing */
543 #endif /* def PCRS */
546 struct file_list *tlist; /* trustfile */
547 #endif /* def TRUST_FILES */
549 struct client_state *next;
557 char *(*parser)(const struct parsers *, char *, struct client_state *);
560 struct cgi_dispatcher
564 int (*handler)(struct client_state *csp, struct http_response *rsp, struct map *parameters);
565 const char *description;
571 * this is a pointer to the data structures associated with the file.
572 * Read-only once the structure has been created.
576 /* Normally NULL. When we are finished with file (i.e. when we have
577 * loaded a new one), set to a pointer to an unloader function.
578 * Unloader will be called by sweep() (called from main loop) when
579 * all clients using this file are done. This prevents threading
582 void (*unloader)(void *);
584 /* Used internally by sweep(). Do not access from elsewhere. */
587 #ifndef SPLIT_PROXY_ARGS
588 /* String to be displayed as part of show-proxy-args display.
589 * Read-only once the structure has been created.
592 #endif /* ndef SPLIT_PROXY_ARGS */
594 /* Following variables allow us to check if file has been changed.
595 * Read-only once the structure has been created.
600 /* Pointer to next entry in the linked list of all "file_list"s.
601 * This linked list is so that sweep() can navigate it.
602 * Since sweep() can remove items from the list, we must be careful
603 * to only access this value from main thread (when we know sweep
606 struct file_list *next;
613 struct url_spec url[1];
615 struct block_spec *next;
617 #endif /* def TRUST_FILES */
620 #define SOCKS_NONE 0 /* Don't use a SOCKS server */
621 #define SOCKS_4 40 /* original SOCKS 4 protocol */
622 #define SOCKS_4A 41 /* as modified for hosts w/o external DNS */
626 struct url_spec url[1];
628 /* Connection type - must be a SOCKS_xxx constant */
635 /* Parent HTTP proxy */
639 /* For the linked list */
640 struct forward_spec *next;
645 struct re_filterfile_spec
647 struct list patterns[1];
650 #endif /* def PCRS */
654 #define ACL_PERMIT 1 /* accept connection request */
655 #define ACL_DENY 2 /* reject connection request */
657 struct access_control_addr
664 struct access_control_list
666 struct access_control_addr src[1];
667 struct access_control_addr dst[1];
670 struct access_control_list *next;
672 #endif /* def ACL_FILES */
675 /* Maximum number of loaders (actions, re_filter, ...) */
679 * Data loaded from the configuration file.
681 * (Anomaly: toggle is still handled through a global, not this structure)
683 struct configuration_spec
692 const char *actions_file;
694 /* The administrator's email address */
697 /* A URL with info on this proxy */
698 char *proxy_info_url;
701 const char *re_filterfile;
702 #endif /* def PCRS */
705 const char * jarfile;
707 #endif /* def JAR_FILES */
710 * Port and IP to bind to.
711 * Defaults to HADDR_DEFAULT:HADDR_PORT == 127.0.0.1:8000
716 #ifndef SPLIT_PROXY_ARGS
717 const char *suppress_message;
718 #endif /* ndef SPLIT_PROXY_ARGS */
720 #ifndef SPLIT_PROXY_ARGS
721 /* suppress listing config files */
722 int suppress_blocklists;
723 #endif /* ndef SPLIT_PROXY_ARGS */
726 const char * trustfile;
728 struct list trust_info[1];
729 struct url_spec *trust_list[64];
730 #endif /* def TRUST_FILES */
733 struct access_control_list *acl;
734 #endif /* def ACL_FILES */
736 struct forward_spec *forward;
738 /* All options from the config file, HTML-formatted */
741 /* the configuration file object. */
742 struct file_list *config_file_list;
744 /* List of loaders */
745 int (*loaders[NLOADERS])(struct client_state *);
747 /* bool, nonzero if we need to bind() to the new port */
752 #define SZ(X) (sizeof(X) / sizeof(*X))
755 #define FORCE_PREFIX "/IJB-FORCE-LOAD"
756 #endif /* def FORCE_LOAD */
759 #define HOME_PAGE_URL "http://ijbswa.sourceforge.net"
760 #define REDIRECT_URL HOME_PAGE_URL "/redirect.php?v=" VERSION "&to="
761 #define CGI_PREFIX_HOST "i.j.b"
764 static const char CSUCCEED[] =
765 "HTTP/1.0 200 Connection established\n"
766 "Proxy-Agent: IJ/" VERSION "\n\n";
768 static const char CHEADER[] =
769 "HTTP/1.0 400 Invalid header received from browser\n\n";
775 #endif /* ndef _PROJECT_H */