1 const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.3 2002/03/03 14:51:11 oes Exp $";
2 /*********************************************************************
4 * File : $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
6 * Purpose : Declares functions to match URLs against URL
9 * Copyright : Written by and Copyright (C) 2001 the SourceForge
10 * IJBSWA team. http://ijbswa.sourceforge.net
12 * Based on the Internet Junkbuster originally written
13 * by and Copyright (C) 1997 Anonymous Coders and
14 * Junkbusters Corporation. http://www.junkbusters.com
16 * This program is free software; you can redistribute it
17 * and/or modify it under the terms of the GNU General
18 * Public License as published by the Free Software
19 * Foundation; either version 2 of the License, or (at
20 * your option) any later version.
22 * This program is distributed in the hope that it will
23 * be useful, but WITHOUT ANY WARRANTY; without even the
24 * implied warranty of MERCHANTABILITY or FITNESS FOR A
25 * PARTICULAR PURPOSE. See the GNU General Public
26 * License for more details.
28 * The GNU General Public License should be included with
29 * this file. If not, you can view it at
30 * http://www.gnu.org/copyleft/gpl.html
31 * or write to the Free Software Foundation, Inc., 59
32 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
35 * $Log: urlmatch.c,v $
36 * Revision 1.3 2002/03/03 14:51:11 oes
37 * Fixed CLF logging: Added ocmd member for client's request to struct http_request
39 * Revision 1.2 2002/01/21 00:14:09 jongfoster
40 * Correcting comment style
41 * Fixing an uninitialized memory bug in create_url_spec()
43 * Revision 1.1 2002/01/17 20:53:46 jongfoster
44 * Moving all our URL and URL pattern parsing code to the same file - it
45 * was scattered around in filters.c, loaders.c and parsers.c.
47 * Providing a single, simple url_match(pattern,url) function - rather than
48 * the 3-line match routine which was repeated all over the place.
50 * Renaming free_url to free_url_spec, since it frees a struct url_spec.
52 * Providing parse_http_url() so that URLs can be parsed without faking a
53 * HTTP request line for parse_http_request() or repeating the parsing
54 * code (both of which were techniques that were actually in use).
56 * Standardizing that struct http_request is used to represent a URL, and
57 * struct url_spec is used to represent a URL pattern. (Before, URLs were
58 * represented as seperate variables and a partially-filled-in url_spec).
61 *********************************************************************/
68 #include <sys/types.h>
76 #if !defined(_WIN32) && !defined(__OS2__)
86 const char urlmatch_h_rcs[] = URLMATCH_H_VERSION;
88 /* Fix a problem with Solaris. There should be no effect on other
90 * Solaris's isspace() is a macro which uses it's argument directly
91 * as an array index. Therefore we need to make sure that high-bit
92 * characters generate +ve values, and ideally we also want to make
93 * the argument match the declared parameter type of "int".
95 * Why did they write a character function that can't take a simple
96 * "char" argument? Doh!
98 #define ijb_isupper(__X) isupper((int)(unsigned char)(__X))
99 #define ijb_tolower(__X) tolower((int)(unsigned char)(__X))
102 /*********************************************************************
104 * Function : free_http_request
106 * Description : Freez a http_request structure
109 * 1 : http = points to a http_request structure to free
113 *********************************************************************/
114 void free_http_request(struct http_request *http)
123 freez(http->hostport);
126 freez(http->host_ip_addr_str);
127 freez(http->dbuffer);
133 /*********************************************************************
135 * Function : parse_http_url
137 * Description : Parse out the host and port from the URL. Find the
138 * hostname & path, port (if ':'), and/or password (if '@')
141 * 1 : url = URL (or is it URI?) to break down
142 * 2 : http = pointer to the http structure to hold elements.
143 * Will be zeroed before use. Note that this
144 * function sets the http->gpc and http->ver
146 * 3 : csp = Current client state (buffers, headers, etc...)
148 * Returns : JB_ERR_OK on success
149 * JB_ERR_MEMORY on out of memory
150 * JB_ERR_CGI_PARAMS on malformed command/URL
151 * or >100 domains deep.
153 *********************************************************************/
154 jb_err parse_http_url(const char * url,
155 struct http_request *http,
156 struct client_state *csp)
159 * Zero out the results structure
161 memset(http, '\0', sizeof(*http));
165 * Save our initial URL
167 http->url = strdup(url);
168 if (http->url == NULL)
170 return JB_ERR_MEMORY;
175 * Split URL into protocol,hostport,path.
185 return JB_ERR_MEMORY;
188 /* Find the start of the URL in our scratch space */
190 if (strncmpic(url_noproto, "http://", 7) == 0)
195 else if (strncmpic(url_noproto, "https://", 8) == 0)
205 url_path = strchr(url_noproto, '/');
206 if (url_path != NULL)
211 * NOTE: The following line ignores the path for HTTPS URLS.
212 * This means that you get consistent behaviour if you type a
213 * https URL in and it's parsed by the function. (When the
214 * URL is actually retrieved, SSL hides the path part).
216 http->path = strdup(http->ssl ? "/" : url_path);
218 http->hostport = strdup(url_noproto);
223 * Repair broken HTTP requests that don't contain a path,
224 * or CONNECT requests
226 http->path = strdup("/");
227 http->hostport = strdup(url_noproto);
232 if ( (http->path == NULL)
233 || (http->hostport == NULL))
236 free_http_request(http);
237 return JB_ERR_MEMORY;
243 * Split hostport into user/password (ignored), host, port.
250 buf = strdup(http->hostport);
253 free_http_request(http);
254 return JB_ERR_MEMORY;
257 /* check if url contains username and/or password */
258 host = strchr(buf, '@');
261 /* Contains username/password, skip it and the @ sign. */
266 /* No username or password. */
270 /* check if url contains port */
271 port = strchr(host, ':');
275 /* Terminate hostname and point to start of port string */
277 http->port = atoi(port);
281 /* No port specified. */
282 http->port = (http->ssl ? 143 : 80);
285 http->host = strdup(host);
289 if (http->host == NULL)
291 free_http_request(http);
292 return JB_ERR_MEMORY;
298 * Split domain name so we can compare it against wildcards
301 char *vec[BUFFER_SIZE];
305 http->dbuffer = strdup(http->host);
306 if (NULL == http->dbuffer)
308 free_http_request(http);
309 return JB_ERR_MEMORY;
312 /* map to lower case */
313 for (p = http->dbuffer; *p ; p++)
315 *p = tolower((int)(unsigned char)*p);
318 /* split the domain name into components */
319 http->dcount = ssplit(http->dbuffer, ".", vec, SZ(vec), 1, 1);
321 if (http->dcount <= 0)
324 * Error: More than SZ(vec) components in domain
325 * or: no components in domain
327 free_http_request(http);
331 /* save a copy of the pointers in dvec */
332 size = http->dcount * sizeof(*http->dvec);
334 http->dvec = (char **)malloc(size);
335 if (NULL == http->dvec)
337 free_http_request(http);
338 return JB_ERR_MEMORY;
341 memcpy(http->dvec, vec, size);
349 /*********************************************************************
351 * Function : parse_http_request
353 * Description : Parse out the host and port from the URL. Find the
354 * hostname & path, port (if ':'), and/or password (if '@')
357 * 1 : req = HTTP request line to break down
358 * 2 : http = pointer to the http structure to hold elements
359 * 3 : csp = Current client state (buffers, headers, etc...)
361 * Returns : JB_ERR_OK on success
362 * JB_ERR_MEMORY on out of memory
363 * JB_ERR_CGI_PARAMS on malformed command/URL
364 * or >100 domains deep.
366 *********************************************************************/
367 jb_err parse_http_request(const char *req,
368 struct http_request *http,
369 struct client_state *csp)
377 memset(http, '\0', sizeof(*http));
382 return JB_ERR_MEMORY;
385 n = ssplit(buf, " \r\n", v, SZ(v), 1, 1);
392 /* this could be a CONNECT request */
393 if (strcmpic(v[0], "connect") == 0)
398 /* or it could be any other basic HTTP request type */
399 else if ((0 == strcmpic(v[0], "get"))
400 || (0 == strcmpic(v[0], "head"))
401 || (0 == strcmpic(v[0], "post"))
402 || (0 == strcmpic(v[0], "put"))
403 || (0 == strcmpic(v[0], "delete"))
405 /* or a webDAV extension (RFC2518) */
406 || (0 == strcmpic(v[0], "propfind"))
407 || (0 == strcmpic(v[0], "proppatch"))
408 || (0 == strcmpic(v[0], "move"))
409 || (0 == strcmpic(v[0], "copy"))
410 || (0 == strcmpic(v[0], "mkcol"))
411 || (0 == strcmpic(v[0], "lock"))
412 || (0 == strcmpic(v[0], "unlock"))
420 /* Unknown HTTP method */
425 err = parse_http_url(v[1], http, csp);
433 * Copy the details into the structure
435 http->ssl = is_connect;
436 http->cmd = strdup(req);
437 http->gpc = strdup(v[0]);
438 http->ver = strdup(v[2]);
440 if ( (http->cmd == NULL)
441 || (http->gpc == NULL)
442 || (http->ver == NULL) )
445 free_http_request(http);
446 return JB_ERR_MEMORY;
453 /*********************************************************************
455 * Function : simple_domaincmp
457 * Description : Domain-wise Compare fqdn's. The comparison is
458 * both left- and right-anchored. The individual
459 * domain names are compared with simplematch().
460 * This is only used by domain_match.
463 * 1 : pv = array of patterns to compare
464 * 2 : fv = array of domain components to compare
465 * 3 : len = length of the arrays (both arrays are the
466 * same length - if they weren't, it couldn't
467 * possibly be a match).
469 * Returns : 0 => domains are equivalent, else no match.
471 *********************************************************************/
472 static int simple_domaincmp(char **pv, char **fv, int len)
476 for (n = 0; n < len; n++)
478 if (simplematch(pv[n], fv[n]))
489 /*********************************************************************
491 * Function : domain_match
493 * Description : Domain-wise Compare fqdn's. Governed by the bimap in
494 * pattern->unachored, the comparison is un-, left-,
495 * right-anchored, or both.
496 * The individual domain names are compared with
500 * 1 : pattern = a domain that may contain a '*' as a wildcard.
501 * 2 : fqdn = domain name against which the patterns are compared.
503 * Returns : 0 => domains are equivalent, else no match.
505 *********************************************************************/
506 static int domain_match(const struct url_spec *pattern, const struct http_request *fqdn)
508 char **pv, **fv; /* vectors */
510 int unanchored = pattern->unanchored & (ANCHOR_RIGHT | ANCHOR_LEFT);
512 plen = pattern->dcount;
517 /* fqdn is too short to match this pattern */
524 if (unanchored == ANCHOR_LEFT)
529 * Convert this into a fully anchored pattern with
530 * the fqdn and pattern the same length
532 fv += (flen - plen); /* flen - plen >= 0 due to check above */
533 return simple_domaincmp(pv, fv, plen);
535 else if (unanchored == 0)
537 /* Fully anchored, check length */
542 return simple_domaincmp(pv, fv, plen);
544 else if (unanchored == ANCHOR_RIGHT)
546 /* Left anchored, ignore all extra in fqdn */
547 return simple_domaincmp(pv, fv, plen);
553 int maxn = flen - plen;
554 for (n = 0; n <= maxn; n++)
556 if (!simple_domaincmp(pv, fv, plen))
561 * Doesn't match from start of fqdn
562 * Try skipping first part of fqdn
572 /*********************************************************************
574 * Function : create_url_spec
576 * Description : Creates a "url_spec" structure from a string.
577 * When finished, free with unload_url().
580 * 1 : url = Target url_spec to be filled in. Will be
582 * 2 : buf = Source pattern, null terminated. NOTE: The
583 * contents of this buffer are destroyed by this
584 * function. If this function succeeds, the
585 * buffer is copied to url->spec. If this
586 * function fails, the contents of the buffer
589 * Returns : JB_ERR_OK - Success
590 * JB_ERR_MEMORY - Out of memory
591 * JB_ERR_PARSE - Cannot parse regex (Detailed message
592 * written to system log)
594 *********************************************************************/
595 jb_err create_url_spec(struct url_spec * url, const char * buf)
603 memset(url, '\0', sizeof(*url));
605 /* save a copy of the orignal specification */
606 if ((url->spec = strdup(buf)) == NULL)
608 return JB_ERR_MEMORY;
611 if ((p = strchr(buf, '/')))
613 if (NULL == (url->path = strdup(p)))
616 return JB_ERR_MEMORY;
618 url->pathlen = strlen(url->path);
630 char rebuf[BUFFER_SIZE];
632 if (NULL == (url->preg = zalloc(sizeof(*url->preg))))
636 return JB_ERR_MEMORY;
639 sprintf(rebuf, "^(%s)", url->path);
641 errcode = regcomp(url->preg, rebuf,
642 (REG_EXTENDED|REG_NOSUB|REG_ICASE));
645 size_t errlen = regerror(errcode,
646 url->preg, rebuf, sizeof(rebuf));
648 if (errlen > (sizeof(rebuf) - (size_t)1))
650 errlen = sizeof(rebuf) - (size_t)1;
652 rebuf[errlen] = '\0';
654 log_error(LOG_LEVEL_ERROR, "error compiling %s: %s",
665 if ((p = strchr(buf, ':')) == NULL)
680 /* Parse domain part */
681 if (buf[strlen(buf) - 1] == '.')
683 url->unanchored |= ANCHOR_RIGHT;
687 url->unanchored |= ANCHOR_LEFT;
690 /* split domain into components */
692 url->dbuffer = strdup(buf);
693 if (NULL == url->dbuffer)
699 #endif /* def REGEX */
700 return JB_ERR_MEMORY;
703 /* map to lower case */
704 for (p = url->dbuffer; *p ; p++)
706 *p = tolower((int)(unsigned char)*p);
709 /* split the domain name into components */
710 url->dcount = ssplit(url->dbuffer, ".", v, SZ(v), 1, 1);
718 #endif /* def REGEX */
721 return JB_ERR_MEMORY;
723 else if (url->dcount != 0)
726 /* save a copy of the pointers in dvec */
727 size = url->dcount * sizeof(*url->dvec);
729 url->dvec = (char **)malloc(size);
730 if (NULL == url->dvec)
736 #endif /* def REGEX */
739 return JB_ERR_MEMORY;
742 memcpy(url->dvec, v, size);
751 /*********************************************************************
753 * Function : free_url_spec
755 * Description : Called from the "unloaders". Freez the url
756 * structure elements.
759 * 1 : url = pointer to a url_spec structure.
763 *********************************************************************/
764 void free_url_spec(struct url_spec *url)
766 if (url == NULL) return;
783 /*********************************************************************
785 * Function : url_match
787 * Description : Compare a URL against a URL pattern.
790 * 1 : pattern = a URL pattern
791 * 2 : url = URL to match
793 * Returns : 0 iff the URL matches the pattern, else nonzero.
795 *********************************************************************/
796 int url_match(const struct url_spec *pattern,
797 const struct http_request *url)
799 return ((pattern->port == 0) || (pattern->port == url->port))
800 && ((pattern->dbuffer == NULL) || (domain_match(pattern, url) == 0))
801 && ((pattern->path == NULL) ||
803 (regexec(pattern->preg, url->path, 0, NULL, 0) == 0)
805 (strncmp(pattern->path, url->path, pattern->pathlen) == 0)