1 const char filters_rcs[] = "$Id: filters.c,v 1.10 2001/05/29 09:50:24 jongfoster Exp $";
2 /*********************************************************************
4 * File : $Source: /cvsroot/ijbswa/current/filters.c,v $
6 * Purpose : Declares functions to parse/crunch headers and pages.
7 * Functions declared include:
8 * `acl_addr', `add_stats', `block_acl', `block_imageurl',
9 * `block_url', `url_permissions', `domaincmp', `dsplit',
10 * `filter_popups', `forward_url', 'redirect_url',
11 * `ij_untrusted_url', `intercept_url', `re_process_buffer',
12 * `show_proxy_args', 'ijb_send_banner', and `trust_url'
14 * Copyright : Written by and Copyright (C) 2001 the SourceForge
15 * IJBSWA team. http://ijbswa.sourceforge.net
17 * Based on the Internet Junkbuster originally written
18 * by and Copyright (C) 1997 Anonymous Coders and
19 * Junkbusters Corporation. http://www.junkbusters.com
21 * This program is free software; you can redistribute it
22 * and/or modify it under the terms of the GNU General
23 * Public License as published by the Free Software
24 * Foundation; either version 2 of the License, or (at
25 * your option) any later version.
27 * This program is distributed in the hope that it will
28 * be useful, but WITHOUT ANY WARRANTY; without even the
29 * implied warranty of MERCHANTABILITY or FITNESS FOR A
30 * PARTICULAR PURPOSE. See the GNU General Public
31 * License for more details.
33 * The GNU General Public License should be included with
34 * this file. If not, you can view it at
35 * http://www.gnu.org/copyleft/gpl.html
36 * or write to the Free Software Foundation, Inc., 59
37 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
41 * Revision 1.10 2001/05/29 09:50:24 jongfoster
42 * Unified blocklist/imagelist/permissionslist.
43 * File format is still under discussion, but the internal changes
46 * Also modified interceptor behaviour:
47 * - We now intercept all URLs beginning with one of the following
48 * prefixes (and *only* these prefixes):
50 * * http://ijbswa.sf.net/config/
51 * * http://ijbswa.sourceforge.net/config/
52 * - New interceptors "home page" - go to http://i.j.b/ to see it.
53 * - Internal changes so that intercepted and fast redirect pages
54 * are not replaced with an image.
55 * - Interceptors now have the option to send a binary page direct
56 * to the client. (i.e. ijb-send-banner uses this)
57 * - Implemented show-url-info interceptor. (Which is why I needed
58 * the above interceptors changes - a typical URL is
59 * "http://i.j.b/show-url-info?url=www.somesite.com/banner.gif".
60 * The previous mechanism would not have intercepted that, and
61 * if it had been intercepted then it then it would have replaced
64 * Revision 1.9 2001/05/27 22:17:04 oes
66 * - re_process_buffer no longer writes the modified buffer
67 * to the client, which was very ugly. It now returns the
68 * buffer, which it is then written by chat.
70 * - content_length now adjusts the Content-Length: header
71 * for modified documents rather than crunch()ing it.
72 * (Length info in csp->content_length, which is 0 for
73 * unmodified documents)
75 * - For this to work, sed() is called twice when filtering.
77 * Revision 1.8 2001/05/26 17:13:28 jongfoster
78 * Filled in a function comment.
80 * Revision 1.7 2001/05/26 15:26:15 jongfoster
81 * ACL feature now provides more security by immediately dropping
82 * connections from untrusted hosts.
84 * Revision 1.6 2001/05/26 00:28:36 jongfoster
85 * Automatic reloading of config file.
86 * Removed obsolete SIGHUP support (Unix) and Reload menu option (Win32).
87 * Most of the global variables have been moved to a new
88 * struct configuration_spec, accessed through csp->config->globalname
89 * Most of the globals remaining are used by the Win32 GUI.
91 * Revision 1.5 2001/05/25 22:34:30 jongfoster
94 * Revision 1.4 2001/05/22 18:46:04 oes
96 * - Enabled filtering banners by size rather than URL
97 * by adding patterns that replace all standard banner
98 * sizes with the "Junkbuster" gif to the re_filterfile
100 * - Enabled filtering WebBugs by providing a pattern
101 * which kills all 1x1 images
103 * - Added support for PCRE_UNGREEDY behaviour to pcrs,
104 * which is selected by the (nonstandard and therefore
105 * capital) letter 'U' in the option string.
106 * It causes the quantifiers to be ungreedy by default.
107 * Appending a ? turns back to greedy (!).
109 * - Added a new interceptor ijb-send-banner, which
110 * sends back the "Junkbuster" gif. Without imagelist or
111 * MSIE detection support, or if tinygif = 1, or the
112 * URL isn't recognized as an imageurl, a lame HTML
113 * explanation is sent instead.
115 * - Added new feature, which permits blocking remote
116 * script redirects and firing back a local redirect
118 * The feature is conditionally compiled, i.e. it
119 * can be disabled with --disable-fast-redirects,
120 * plus it must be activated by a "fast-redirects"
121 * line in the config file, has its own log level
122 * and of course wants to be displayed by show-proxy-args
123 * Note: Boy, all the #ifdefs in 1001 locations and
124 * all the fumbling with configure.in and acconfig.h
125 * were *way* more work than the feature itself :-(
127 * - Because a generic redirect template was needed for
128 * this, tinygif = 3 now uses the same.
130 * - Moved GIFs, and other static HTTP response templates
135 * - Removed some >400 CRs again (Jon, you really worked
138 * Revision 1.3 2001/05/20 16:44:47 jongfoster
139 * Removing last hardcoded JunkBusters.com URLs.
141 * Revision 1.2 2001/05/20 01:21:20 jongfoster
142 * Version 2.9.4 checkin.
143 * - Merged popupfile and cookiefile, and added control over PCRS
144 * filtering, in new "permissionsfile".
145 * - Implemented LOG_LEVEL_FATAL, so that if there is a configuration
146 * file error you now get a message box (in the Win32 GUI) rather
147 * than the program exiting with no explanation.
148 * - Made killpopup use the PCRS MIME-type checking and HTTP-header
150 * - Removed tabs from "config"
151 * - Moved duplicated url parsing code in "loaders.c" to a new funcition.
152 * - Bumped up version number.
154 * Revision 1.1.1.1 2001/05/15 13:58:52 oes
155 * Initial import of version 2.9.3 source tree
158 *********************************************************************/
164 #include <sys/types.h>
171 #include <netinet/in.h>
173 #include <winsock2.h>
180 #include "showargs.h"
184 #include "jbsockets.h"
186 #include "jbsockets.h"
187 #include "miscutil.h"
193 const char filters_h_rcs[] = FILTERS_H_VERSION;
195 /* Fix a problem with Solaris. There should be no effect on other
197 * Solaris's isspace() is a macro which uses it's argument directly
198 * as an array index. Therefore we need to make sure that high-bit
199 * characters generate +ve values, and ideally we also want to make
200 * the argument match the declared parameter type of "int".
202 #define ijb_isdigit(__X) isdigit((int)(unsigned char)(__X))
205 static const char CBLOCK[] =
207 "HTTP/1.0 403 Request for blocked URL\n"
208 #else /* ifndef AMIGA */
209 "HTTP/1.0 202 Request for blocked URL\n"
210 #endif /* ndef AMIGA */
212 "Last-Modified: Thu Jul 31, 1997 07:42:22 pm GMT\n"
213 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
214 "Content-Type: text/html\n\n"
217 "<title>Internet Junkbuster: Request for blocked URL</title>\n"
223 "<p align=center>Your request for <b>%s%s</b>\n"
224 "was blocked.<br><a href=\"http://i.j.b/show-url-info?url=%s%s\">See why</a>"
226 " or <a href=\"http://%s" FORCE_PREFIX "%s\">"
227 "go there anyway.</a>"
228 #endif /* def FORCE_LOAD */
234 static const char CTRUST[] =
236 "HTTP/1.0 403 Request for untrusted URL\n"
237 #else /* ifndef AMIGA */
238 "HTTP/1.0 202 Request for untrusted URL\n"
239 #endif /* ndef AMIGA */
241 "Last-Modified: Thu Jul 31, 1997 07:42:22 pm GMT\n"
242 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
243 "Content-Type: text/html\n\n"
246 "<title>Internet Junkbuster: Request for untrusted URL</title>\n"
250 "<a href=http://i.j.b/ij-untrusted-url?%s+%s+%s>"
256 #endif /* def TRUST_FILES */
260 /*********************************************************************
262 * Function : block_acl
264 * Description : Block this request?
265 * Decide yes or no based on ACL file.
268 * 1 : dst = The proxy or gateway address this is going to.
269 * Or NULL to check all possible targets.
270 * 2 : csp = Current client state (buffers, headers, etc...)
271 * Also includes the client IP address.
273 * Returns : 0 = FALSE (don't block) and 1 = TRUE (do block)
275 *********************************************************************/
276 int block_acl(struct access_control_addr *dst,
277 struct client_state *csp)
279 struct file_list *fl;
280 struct access_control_list *a, *acl;
282 /* if not using an access control list, then permit the connection */
283 if (((fl = csp->alist) == NULL) ||
284 ((acl = (struct access_control_list *) fl->f) == NULL))
289 /* search the list */
290 for (a = acl->next ; a ; a = a->next)
292 if ((csp->ip_addr_long & a->src->mask) == a->src->addr)
296 /* Just want to check if they have any access */
297 if (a->action == ACL_PERMIT)
302 else if ( ((dst->addr & a->dst->mask) == a->dst->addr)
303 && ((dst->port == a->dst->port) || (a->dst->port == 0)))
305 if (a->action == ACL_PERMIT)
322 /*********************************************************************
324 * Function : acl_addr
326 * Description : Called from `load_aclfile' to parse an ACL address.
329 * 1 : aspec = String specifying ACL address.
330 * 2 : aca = struct access_control_addr to fill in.
332 * Returns : 0 => Ok, everything else is an error.
334 *********************************************************************/
335 int acl_addr(char *aspec, struct access_control_addr *aca)
337 int i, masklength, port;
343 if ((p = strchr(aspec, '/')))
347 if (ijb_isdigit(*p) == 0)
351 masklength = atoi(p);
354 if ((masklength < 0) || (masklength > 32))
359 if ((p = strchr(aspec, ':')))
363 if (ijb_isdigit(*p) == 0)
372 aca->addr = ntohl(resolve_hostname_to_ip(aspec));
376 log_error(LOG_LEVEL_ERROR, "can't resolve address for %s", aspec);
380 /* build the netmask */
382 for (i=1; i <= masklength ; i++)
384 aca->mask |= (1 << (32 - i));
387 /* now mask off the host portion of the ip address
388 * (i.e. save on the network portion of the address).
390 aca->addr = aca->addr & aca->mask;
395 #endif /* def ACL_FILES */
398 /*********************************************************************
400 * Function : block_url
402 * Description : Called from `chat'. Check to see if we need to block this.
405 * 1 : http = http_request request to "check" for blocked
406 * 2 : csp = Current client state (buffers, headers, etc...)
408 * Returns : NULL => unblocked, else string to HTML block description.
410 *********************************************************************/
411 char *block_url(struct http_request *http, struct client_state *csp)
417 if ((csp->permissions & PERMIT_BLOCK) == 0)
425 #endif /* def FORCE_LOAD */
428 n += factor * strlen(http->hostport);
429 n += factor * strlen(http->path);
431 p = (char *)malloc(n);
434 sprintf(p, CBLOCK, http->hostport, http->path, http->hostport, http->path,
435 http->hostport, http->path);
437 sprintf(p, CBLOCK, http->hostport, http->path, http->hostport, http->path);
438 #endif /* def FORCE_LOAD */
445 #ifdef IMAGE_BLOCKING
446 /*********************************************************************
448 * Function : block_imageurl
450 * Description : Given a URL which is blocked, decide whether to
451 * send the "blocked" image or HTML.
454 * 1 : http = URL to check.
455 * 2 : csp = Current client state (buffers, headers, etc...)
457 * Returns : True (nonzero) if URL is in image list, false (0)
460 *********************************************************************/
461 int block_imageurl(struct http_request *http, struct client_state *csp)
463 #ifdef DETECT_MSIE_IMAGES
464 if ((csp->accept_types
465 & (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_IMAGE|ACCEPT_TYPE_MSIE_HTML))
466 == (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_IMAGE))
470 else if ((csp->accept_types
471 & (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_IMAGE|ACCEPT_TYPE_MSIE_HTML))
472 == (ACCEPT_TYPE_IS_MSIE|ACCEPT_TYPE_MSIE_HTML))
478 return ((csp->permissions & PERMIT_IMAGE) != 0);
480 #endif /* def IMAGE_BLOCKING */
484 /*********************************************************************
486 * Function : re_process_buffer
488 * Description : Apply all jobs from the joblist (aka. Perl regexp's) to
489 * the text buffer that's been accumulated in csp->iob->buf
490 * and set csp->content_length to the modified size.
493 * 1 : csp = Current client state (buffers, headers, etc...)
495 * Returns : a pointer to the (newly allocated) modified buffer.
498 *********************************************************************/
499 char *re_process_buffer(struct client_state *csp)
502 int size = csp->iob->eod - csp->iob->cur;
503 char *old=csp->iob->cur, *new = NULL;
504 pcrs_job *job, *joblist;
506 struct file_list *fl;
507 struct re_filterfile_spec *b;
509 /* Sanity first ;-) */
515 if ( ( NULL == (fl = csp->rlist) ) || ( NULL == (b = fl->f) ) )
517 log_error(LOG_LEVEL_ERROR, "Unable to get current state of regexp filtering.");
521 joblist = b->joblist;
524 log_error(LOG_LEVEL_RE_FILTER, "re_filtering %s%s (size %d) ...",
525 csp->http->hostport, csp->http->path, size);
527 /* Apply all jobs from the joblist */
528 for (job = joblist; NULL != job; job = job->next)
530 hits += pcrs_exec_substitution(job, old, size, &new, &size);
531 if (old != csp->iob->cur) free(old);
535 log_error(LOG_LEVEL_RE_FILTER, " produced %d hits (new size %d).", hits, size);
537 csp->content_length = size;
539 /* fwiw, reset the iob */
544 #endif /* def PCRS */
548 /*********************************************************************
550 * Function : trust_url
552 * Description : Should we "trust" this URL? See "trustfile" line in config.
555 * 1 : http = http_request request for requested URL
556 * 2 : csp = Current client state (buffers, headers, etc...)
558 * Returns : NULL => trusted, else string to HTML "untrusted" description.
560 *********************************************************************/
561 char *trust_url(struct http_request *http, struct client_state *csp)
563 struct file_list *fl;
564 struct block_spec *b;
565 struct url_spec url[1], **tl, *t;
567 char *hostport, *path, *refer;
568 struct http_request rhttp[1];
571 if (((fl = csp->tlist) == NULL) || ((b = fl->f) == NULL))
576 *url = dsplit(http->host);
578 /* if splitting the domain fails, punt */
579 if (url->dbuf == NULL) return(NULL);
581 memset(rhttp, '\0', sizeof(*rhttp));
583 for (b = b->next; b ; b = b->next)
585 if ((b->url->port == 0) || (b->url->port == http->port))
587 if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0))
589 if ((b->url->path == NULL) ||
591 (regexec(b->url->preg, http->path, 0, NULL, 0) == 0)
593 (strncmp(b->url->path, http->path, b->url->pathlen) == 0)
600 if (b->reject == 0) return(NULL);
602 hostport = url_encode(http->hostport);
603 path = url_encode(http->path);
607 refer = url_encode(csp->referrer);
611 refer = url_encode("undefined");
615 n += strlen(hostport);
619 p = (char *)malloc(n);
621 sprintf(p, CTRUST, hostport, path, refer);
636 if ((csp->referrer == NULL)|| (strlen(csp->referrer) <= 9))
638 /* no referrer was supplied */
639 goto trust_url_not_trusted;
642 /* forge a URL from the referrer so we can use
643 * convert_url() to parse it into its components.
647 p = strsav(p, "GET ");
648 p = strsav(p, csp->referrer + 9); /* skip over "Referer: " */
649 p = strsav(p, " HTTP/1.0");
651 parse_http_request(p, rhttp, csp);
653 if (rhttp->cmd == NULL)
656 goto trust_url_not_trusted;
661 *url = dsplit(rhttp->host);
663 /* if splitting the domain fails, punt */
664 if (url->dbuf == NULL) goto trust_url_not_trusted;
666 for (tl = csp->config->trust_list; (t = *tl) ; tl++)
668 if ((t->port == 0) || (t->port == rhttp->port))
670 if ((t->domain[0] == '\0') || domaincmp(t, url) == 0)
672 if ((t->path == NULL) ||
674 (regexec(t->preg, rhttp->path, 0, NULL, 0) == 0)
676 (strncmp(t->path, rhttp->path, t->pathlen) == 0)
680 /* if the URL's referrer is from a trusted referrer, then
681 * add the target spec to the trustfile as an unblocked
682 * domain and return NULL (which means it's OK).
690 if ((fp = fopen(csp->config->trustfile, "a")))
695 h = strsav(h, http->hostport);
701 /* since this path points into a user's home space
702 * be sure to include this spec in the trustfile.
704 if ((p = strchr(p, '/')))
707 h = strsav(h, http->path);
712 free_http_request(rhttp);
714 fprintf(fp, "%s\n", h);
724 trust_url_not_trusted:
725 free_http_request(rhttp);
727 hostport = url_encode(http->hostport);
728 path = url_encode(http->path);
732 refer = url_encode(csp->referrer);
736 refer = url_encode("undefined");
740 n += strlen(hostport);
744 p = (char *)malloc(n);
745 sprintf(p, CTRUST, hostport, path, refer);
754 #endif /* def TRUST_FILES */
757 static const char C_HOME_PAGE[] =
760 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
761 "Content-Type: text/html\n\n"
764 "<title>Internet Junkbuster: Information</title>\n"
770 "<p><a href=\"" HOME_PAGE_URL "\">JunkBuster web site</a></p>\n"
771 "<p><a href=\"http://i.j.b/show-proxy-arg\">Proxy configuration</a></p>\n"
772 "<p><a href=\"http://i.j.b/show-url-info\">Look up a URL</a></p>\n"
777 /*********************************************************************
779 * Function : intercept_url
781 * Description : checks the URL `basename' against a list of URLs to
782 * snarf. If it matches, it calls the associated function
783 * which returns an HTML page to send back to the client.
784 * Right now, we snarf:
785 * "show-proxy-args", and
786 * "ij-untrusted-url" (optional w/TRUST_FILES)
789 * 1 : http = http_request request, check `basename's of blocklist
790 * 2 : csp = Current client state (buffers, headers, etc...)
792 * Returns : 1 if it intercepts & handles the request.
794 *********************************************************************/
795 int intercept_url(struct http_request *http, struct client_state *csp)
797 char *basename = NULL;
798 const struct interceptors *v;
800 if (0 == strcmpic(http->host,"i.j.b"))
803 * Catch http://i.j.b/...
805 basename = http->path;
807 else if ( ( (0 == strcmpic(http->host,"ijbswa.sourceforge.net"))
808 || (0 == strcmpic(http->host,"ijbswa.sf.net")) )
809 && (0 == strncmpic(http->path,"/config", 7))
810 && ((http->path[7] == '/') || (http->path[7] == '\0')))
813 * Catch http://ijbswa.sourceforge.net/config/...
814 * and http://ijbswa.sf.net/config/...
816 basename = http->path + 7;
821 /* Don't want to intercept */
825 /* We have intercepted it. */
827 /* remove any leading slash */
828 if (*basename == '/')
833 log_error(LOG_LEVEL_GPC, "%s%s intercepted!", http->hostport, http->path);
834 log_error(LOG_LEVEL_CLF, "%s - - [%T] \"%s\" 200 3",
835 csp->ip_addr_str, http->cmd);
837 for (v = intercept_patterns; v->str; v++)
839 if (strncmp(basename, v->str, v->len) == 0)
841 char * p = ((v->interceptor)(http, csp));
845 /* Send HTML redirection result */
846 write_socket(csp->cfd, p, strlen(p));
854 write_socket(csp->cfd, C_HOME_PAGE, strlen(C_HOME_PAGE));
859 #ifdef FAST_REDIRECTS
860 /*********************************************************************
862 * Function : redirect_url
864 * Description : Checks for redirection URLs and returns a HTTP redirect
865 * to the destination URL.
868 * 1 : http = http_request request, check `basename's of blocklist
869 * 2 : csp = Current client state (buffers, headers, etc...)
871 * Returns : NULL if URL was clean, HTTP redirect otherwise.
873 *********************************************************************/
874 char *redirect_url(struct http_request *http, struct client_state *csp)
878 p = q = csp->http->path;
879 log_error(LOG_LEVEL_REDIRECTS, "checking path: %s", p);
881 /* find the last URL encoded in the request */
882 while (p = strstr(p, "http://"))
887 /* if there was any, generate and return a HTTP redirect */
888 if (q != csp->http->path)
890 log_error(LOG_LEVEL_REDIRECTS, "redirecting to: %s", q);
892 p = (char *)malloc(strlen(HTTP_REDIRECT_TEMPLATE) + strlen(q));
893 sprintf(p, HTTP_REDIRECT_TEMPLATE, q);
902 #endif /* def FAST_REDIRECTS */
904 /*********************************************************************
906 * Function : url_permissions
908 * Description : Gets the permissions for this URL.
911 * 1 : http = http_request request for blocked URLs
912 * 2 : csp = Current client state (buffers, headers, etc...)
914 * Returns : permissions bitmask specifiying what this URL can do.
915 * If not on list, will be default_permissions.
917 *********************************************************************/
918 int url_permissions(struct http_request *http, struct client_state *csp)
920 struct file_list *fl;
921 struct permissions_spec *b;
922 struct url_spec url[1];
923 int permissions = csp->config->default_permissions;
925 if (((fl = csp->permissions_list) == NULL) || ((b = fl->f) == NULL))
930 *url = dsplit(http->host);
932 /* if splitting the domain fails, punt */
933 if (url->dbuf == NULL)
938 for (b = b->next; NULL != b; b = b->next)
940 if ((b->url->port == 0) || (b->url->port == http->port))
942 if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0))
944 if ((b->url->path == NULL) ||
946 (regexec(b->url->preg, http->path, 0, NULL, 0) == 0)
948 (strncmp(b->url->path, http->path, b->url->pathlen) == 0)
952 permissions &= b->mask;
953 permissions |= b->add;
966 /*********************************************************************
968 * Function : forward_url
970 * Description : Should we forward this to another proxy?
973 * 1 : http = http_request request for current URL
974 * 2 : csp = Current client state (buffers, headers, etc...)
976 * Returns : Return gw_default for no forward match,
977 * else a gateway pointer to a specific forwarding proxy.
979 *********************************************************************/
980 const struct gateway *forward_url(struct http_request *http, struct client_state *csp)
982 struct file_list *fl;
983 struct forward_spec *b;
984 struct url_spec url[1];
986 if (((fl = csp->flist) == NULL) || ((b = fl->f) == NULL))
991 *url = dsplit(http->host);
993 /* if splitting the domain fails, punt */
994 if (url->dbuf == NULL) return(gw_default);
996 for (b = b->next; b ; b = b->next)
998 if ((b->url->port == 0) || (b->url->port == http->port))
1000 if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0))
1002 if ((b->url->path == NULL) ||
1004 (regexec(b->url->preg, http->path, 0, NULL, 0) == 0)
1006 (strncmp(b->url->path, http->path, b->url->pathlen) == 0)
1025 /*********************************************************************
1029 * Description : Takes a domain and returns a pointer to a url_spec
1030 * structure populated with dbuf, dcnt and dvec. The
1031 * other fields in the structure that is returned are zero.
1034 * 1 : domain = a URL address
1036 * Returns : url_spec structure populated with dbuf, dcnt and dvec.
1038 *********************************************************************/
1039 struct url_spec dsplit(char *domain)
1041 struct url_spec ret[1];
1046 memset(ret, '\0', sizeof(*ret));
1048 if ((p = strrchr(domain, '.')))
1056 ret->dbuf = strdup(domain);
1058 /* map to lower case */
1059 for (p = ret->dbuf; *p ; p++) *p = tolower(*p);
1061 /* split the domain name into components */
1062 ret->dcnt = ssplit(ret->dbuf, ".", v, SZ(v), 1, 1);
1066 memset(ret, '\0', sizeof(ret));
1070 /* save a copy of the pointers in dvec */
1071 size = ret->dcnt * sizeof(*ret->dvec);
1073 if ((ret->dvec = (char **)malloc(size)))
1075 memcpy(ret->dvec, v, size);
1083 /*********************************************************************
1085 * Function : domaincmp
1087 * Description : Compare domain names.
1088 * domaincmp("a.b.c" , "a.b.c") => 0 (MATCH)
1089 * domaincmp("a*.b.c", "a.b.c") => 0 (MATCH)
1090 * domaincmp("b.c" , "a.b.c") => 0 (MATCH)
1091 * domaincmp("" , "a.b.c") => 0 (MATCH)
1094 * 1 : pattern = a domain that may contain a '*' as a wildcard.
1095 * 2 : fqdn = domain name against which the patterns are compared.
1097 * Returns : 0 => domains are equivalent, else no match.
1099 *********************************************************************/
1100 int domaincmp(struct url_spec *pattern, struct url_spec *fqdn)
1102 char **pv, **fv; /* vectors */
1103 int pn, fn; /* counters */
1104 char *p, *f; /* chars */
1112 while ((pn > 0) && (fn > 0))
1117 while (*p && *f && (*p == tolower(*f)))
1122 if ((*p != tolower(*f)) && (*p != '*')) return(1);
1125 if (pn > 0) return(1);
1132 /* intercept functions */
1134 /*********************************************************************
1136 * Function : show_proxy_args
1138 * Description : This "crunch"es "http:/any.thing/show-proxy-args" and
1139 * returns a web page describing the current status of IJB.
1142 * 1 : http = ignored
1143 * 2 : csp = Current client state (buffers, headers, etc...)
1145 * Returns : A string that contains the current status of IJB.
1147 *********************************************************************/
1148 char *show_proxy_args(struct http_request *http, struct client_state *csp)
1152 #ifdef SPLIT_PROXY_ARGS
1156 const char * filename = NULL;
1157 const char * file_description = NULL;
1158 char * query_string = strrchr(http->path, '?');
1159 char which_file = '\0';
1162 if (query_string != NULL)
1164 /* first char past the last '?' (maybe '\0')*/
1165 which_file = query_string[1];
1170 if (csp->permissions_list)
1172 filename = csp->permissions_list->filename;
1173 file_description = "Permissions List";
1179 filename = csp->flist->filename;
1180 file_description = "Forward List";
1188 filename = csp->alist->filename;
1189 file_description = "Access Control List";
1192 #endif /* def ACL_FILES */
1198 filename = csp->rlist->filename;
1199 file_description = "RE Filter List";
1202 #endif /* def PCRS */
1208 filename = csp->tlist->filename;
1209 file_description = "Trust List";
1212 #endif /* def TRUST_FILES */
1217 /* Display specified file */
1218 /* FIXME: Add HTTP headers so this isn't cached */
1221 "Server: IJ/" VERSION "\n"
1222 "Content-type: text/html\n"
1223 "Pragma: no-cache\n"
1224 "Last-Modified: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1225 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1230 "<title>Internet Junkbuster Proxy Status - ");
1231 s = strsav(s, file_description);
1235 "<body bgcolor=\"#f8f8f0\" link=\"#000078\" alink=\"#ff0022\" vlink=\"#787878\">\n"
1237 "<h1>" BANNER "\n");
1238 s = strsav(s, file_description);
1241 "<p><a href=\"show-proxy-args\">Back to proxy status</a></p>\n"
1243 s = strsav(s, file_description);
1246 "Contents of file "<code>");
1247 p = html_encode(filename);
1251 "</code>":<br>\n"
1255 if ((fp = fopen(filename, "r")) == NULL)
1257 s = strsav(s, "</pre><h1>ERROR OPENING FILE!</h1><pre>");
1261 while (fgets(buf, sizeof(buf), fp))
1263 p = html_encode(buf);
1268 s = strsav(s, "<br>");
1277 "<p><a href=\"show-proxy-args\">Back to proxy status</a></p>\n"
1279 "<small><small><p>\n"
1280 "The " BANNER " Proxy - \n"
1281 "<a href=\"" HOME_PAGE_URL "\">" HOME_PAGE_URL "</a>\n"
1283 "</body></html>\n");
1286 #endif /* def SPLIT_PROXY_ARGS */
1288 s = strsav(s, csp->config->proxy_args_header);
1289 s = strsav(s, csp->config->proxy_args_invocation);
1292 #endif /* def STATISTICS */
1293 s = strsav(s, csp->config->proxy_args_gateways);
1295 #ifdef SPLIT_PROXY_ARGS
1297 "<h2>The following files are in use:</h2>\n"
1298 "<p>(Click a filename to view it)</p>\n"
1301 if (csp->permissions_list)
1303 s = strsav(s, "<li>Permissions List: <a href=\"show-proxy-args?permit\"><code>");
1304 s = strsav(s, csp->permissions_list->filename);
1305 s = strsav(s, "</code></a></li>\n");
1310 s = strsav(s, "<li>Forward List: <a href=\"show-proxy-args?forward\"><code>");
1311 s = strsav(s, csp->flist->filename);
1312 s = strsav(s, "</code></a></li>\n");
1318 s = strsav(s, "<li>Access Control List: <a href=\"show-proxy-args?acl\"><code>");
1319 s = strsav(s, csp->alist->filename);
1320 s = strsav(s, "</code></a></li>\n");
1322 #endif /* def ACL_FILES */
1327 s = strsav(s, "<li>RE Filter List: <a href=\"show-proxy-args?re\"><code>");
1328 s = strsav(s, csp->rlist->filename);
1329 s = strsav(s, "</code></a></li>\n");
1331 #endif /* def PCRS */
1336 s = strsav(s, "<li>Trust List: <a href=\"show-proxy-args?trust\"><code>");
1337 s = strsav(s, csp->tlist->filename);
1338 s = strsav(s, "</code></a></li>\n");
1340 #endif /* def TRUST_FILES */
1342 s = strsav(s, "</ul>");
1344 #else /* ifndef SPLIT_PROXY_ARGS */
1347 s = strsav(s, csp->clist->proxy_args);
1352 s = strsav(s, csp->flist->proxy_args);
1358 s = strsav(s, csp->alist->proxy_args);
1360 #endif /* def ACL_FILES */
1365 s = strsav(s, csp->rlist->proxy_args);
1367 #endif /* def PCRS */
1372 s = strsav(s, csp->tlist->proxy_args);
1374 #endif /* def TRUST_FILES */
1376 #endif /* ndef SPLIT_PROXY_ARGS */
1378 s = strsav(s, csp->config->proxy_args_trailer);
1385 static const char C_URL_INFO_HEADER[] =
1387 "Pragma: no-cache\n"
1388 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1389 "Content-Type: text/html\n\n"
1392 "<title>Internet Junkbuster: URL Info</title>\n"
1398 "<p>Information for: <a href=\"http://%s\">http://%s</a></p>\n";
1399 static const char C_URL_INFO_FOOTER[] =
1404 static const char C_URL_INFO_FORM[] =
1406 "Pragma: no-cache\n"
1407 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1408 "Content-Type: text/html\n\n"
1411 "<title>Internet Junkbuster: URL Info</title>\n"
1417 "<form method=\"GET\" action=\"http://i.j.b/show-url-info\">\n"
1418 "<p>Please enter a URL, without the leading "http://":</p>"
1419 "<p><input type=\"text\" name=\"url\" size=\"80\">"
1420 "<input type=\"submit\" value=\"Info\"></p>\n"
1426 /*********************************************************************
1428 * Function : permissions_to_text
1430 * Description : Converts a permissionsfil entry from numeric form
1431 * ("mask" and "add") to text.
1434 * 1 : mask = As from struct permissions_spec
1435 * 2 : add = As from struct permissions_spec
1437 * Returns : A string. Caller must free it.
1439 *********************************************************************/
1440 char * permissions_to_text(unsigned mask, unsigned add)
1442 char * result = strdup("");
1444 /* sanity - prevents "-feature +feature" */
1447 #define PERMISSION_TO_TEXT(__bit, __name) \
1448 if (!(mask & __bit)) \
1450 result = strsav(result, " -" __name); \
1452 else if (add & __bit) \
1454 result = strsav(result, " +" __name); \
1457 PERMISSION_TO_TEXT(PERMIT_COOKIE_SET, "cookies-set");
1458 PERMISSION_TO_TEXT(PERMIT_COOKIE_READ, "cookies-read");
1459 PERMISSION_TO_TEXT(PERMIT_RE_FILTER, "filter");
1460 PERMISSION_TO_TEXT(PERMIT_POPUPS, "popup");
1461 PERMISSION_TO_TEXT(PERMIT_REFERER, "referer");
1462 PERMISSION_TO_TEXT(PERMIT_FAST_REDIRECTS, "fast-redirects");
1463 PERMISSION_TO_TEXT(PERMIT_BLOCK, "block");
1464 PERMISSION_TO_TEXT(PERMIT_IMAGE, "image");
1470 /*********************************************************************
1472 * Function : ijb_show_url_info
1474 * Description : (please fill me in)
1477 * 1 : http = http_request request for crunched URL
1478 * 2 : csp = Current client state (buffers, headers, etc...)
1480 * Returns : ???FIXME
1482 *********************************************************************/
1483 char *ijb_show_url_info(struct http_request *http, struct client_state *csp)
1485 char * query_string = strchr(http->path, '?');
1488 if (query_string != NULL)
1490 query_string = url_decode(query_string + 1);
1491 if (strncmpic(query_string, "url=", 4) == 0)
1493 host = strdup(query_string + 4);
1495 freez(query_string);
1503 struct file_list *fl;
1504 struct permissions_spec *b;
1505 struct url_spec url[1];
1506 int permissions = csp->config->default_permissions;
1508 result = (char *)malloc(sizeof(C_URL_INFO_HEADER) + 2 * strlen(host));
1509 sprintf(result, C_URL_INFO_HEADER, host, host);
1511 s = permissions_to_text(permissions, permissions);
1512 result = strsav(result, "<h3>Defaults:</h3>\n<p><b>{");
1513 result = strsav(result, s);
1514 result = strsav(result, " }</b></p>\n<h3>Patterns affecting the URL:</h3>\n<p>\n");
1517 s = strchr(host, '/');
1527 s = strchr(host, ':');
1534 if (((fl = csp->permissions_list) == NULL) || ((b = fl->f) == NULL))
1538 result = strsav(result, C_URL_INFO_FOOTER);
1542 *url = dsplit(host);
1544 /* if splitting the domain fails, punt */
1545 if (url->dbuf == NULL)
1549 result = strsav(result, C_URL_INFO_FOOTER);
1553 for (b = b->next; NULL != b; b = b->next)
1555 if ((b->url->port == 0) || (b->url->port == port))
1557 if ((b->url->domain[0] == '\0') || (domaincmp(b->url, url) == 0))
1559 if ((b->url->path == NULL) ||
1561 (regexec(b->url->preg, path, 0, NULL, 0) == 0)
1563 (strncmp(b->url->path, path, b->url->pathlen) == 0)
1567 s = permissions_to_text(b->mask, b->add);
1568 result = strsav(result, "<b>{");
1569 result = strsav(result, s);
1570 result = strsav(result, " }</b><br>\n<code>");
1571 result = strsav(result, b->url->spec);
1572 result = strsav(result, "</code><br>\n<br>\n");
1574 permissions &= b->mask;
1575 permissions |= b->add;
1587 s = permissions_to_text(permissions, permissions);
1588 result = strsav(result, "</p>\n<h2>Final Results:</h2>\n<p><b>{");
1589 result = strsav(result, s);
1590 result = strsav(result, " }</b><br>\n<br>\n");
1593 result = strsav(result, C_URL_INFO_FOOTER);
1598 return strdup(C_URL_INFO_FORM);
1603 /*********************************************************************
1605 * Function : ijb_send_banner
1607 * Description : This "crunch"es "http://i.j.b/ijb-send-banner and
1611 * 1 : http = http_request request for crunched URL
1612 * 2 : csp = Current client state (buffers, headers, etc...)
1614 * Returns : NULL, indicating that it has already sent the data.
1616 *********************************************************************/
1617 char *ijb_send_banner(struct http_request *http, struct client_state *csp)
1619 write_socket(csp->cfd, JBGIF, sizeof(JBGIF)-1);
1625 /*********************************************************************
1627 * Function : ij_untrusted_url
1629 * Description : This "crunch"es "http:/any.thing/ij-untrusted-url" and
1630 * returns a web page describing why it was untrusted.
1633 * 1 : http = http_request request for crunched URL
1634 * 2 : csp = Current client state (buffers, headers, etc...)
1636 * Returns : A string that contains why this was untrusted.
1638 *********************************************************************/
1639 char *ij_untrusted_url(struct http_request *http, struct client_state *csp)
1642 char *hostport, *path, *refer, *p, *v[9];
1644 struct url_spec **tl, *t;
1647 static const char format[] =
1648 "HTTP/1.0 200 OK\r\n"
1649 "Pragma: no-cache\n"
1650 "Last-Modified: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1651 "Expires: Thu Jul 31, 1997 07:42:22 pm GMT\n"
1652 "Content-Type: text/html\n\n"
1655 "<title>Internet Junkbuster: Request for untrusted URL</title>\n"
1661 "The " BANNER " Proxy "
1662 "<A href=\"" HOME_PAGE_URL "\">"
1663 "(" HOME_PAGE_URL ") </A>"
1664 "intercepted the request for %s%s\n"
1665 "because the URL is not trusted.\n"
1668 if ((n = ssplit(http->path, "?+", v, SZ(v), 0, 0)) == 4)
1670 hostport = url_decode(v[1]);
1671 path = url_decode(v[2]);
1672 refer = url_decode(v[3]);
1676 hostport = strdup("undefined_host");
1677 path = strdup("/undefined_path");
1678 refer = strdup("undefined");
1682 n += strlen(hostport);
1685 if ((p = (char *)malloc(n)))
1687 sprintf(p, format, hostport, path);
1690 strsav(p, "The referrer in this request was <strong>");
1692 strsav(p, "</strong><br>\n");
1698 p = strsav(p, "<h3>The following referrers are trusted</h3>\n");
1700 for (tl = csp->config->trust_list; (t = *tl) ; tl++)
1702 sprintf(buf, "%s<br>\n", t->spec);
1706 if (csp->config->trust_info->next)
1712 "You can learn more about what this means "
1713 "and what you may be able to do about it by "
1714 "reading the following documents:<br>\n"
1720 for (l = csp->config->trust_info->next; l ; l = l->next)
1723 "<li> <a href=%s>%s</a><br>\n",
1728 p = strsav(p, "</ol>\n");
1731 p = strsav(p, "</body>\n" "</html>\n");
1736 #endif /* def TRUST_FILES */
1740 /*********************************************************************
1742 * Function : add_stats
1744 * Description : Statistics function of JB. Called by `show_proxy_args'.
1747 * 1 : s = string that holds the proxy args description page
1749 * Returns : A pointer to the descriptive status web page.
1751 *********************************************************************/
1752 char *add_stats(char *s)
1755 * Output details of the number of requests rejected and
1756 * accepted. This is switchable in the junkbuster config.
1757 * Does nothing if this option is not enabled.
1760 float perc_rej; /* Percentage of http requests rejected */
1762 int local_urls_read = urls_read;
1763 int local_urls_rejected = urls_rejected;
1766 * Need to alter the stats not to include the fetch of this
1769 * Can't do following thread safely! doh!
1772 * urls_rejected--; * This will be incremented subsequently *
1775 s = strsav(s,"<h2>Statistics for this " BANNER ":</h2>\n");
1777 if (local_urls_read == 0)
1780 s = strsav(s,"No activity so far!\n");
1786 perc_rej = (float)local_urls_rejected * 100.0F /
1787 (float)local_urls_read;
1790 "%d requests received, %d filtered "
1793 local_urls_rejected, perc_rej);
1795 s = strsav(s,out_str);
1800 #endif /* def STATISTICS */