1 const char urlmatch_rcs[] = "$Id: urlmatch.c,v 1.1 2002/01/17 20:53:46 jongfoster Exp $";
2 /*********************************************************************
4 * File : $Source: /cvsroot/ijbswa/current/urlmatch.c,v $
6 * Purpose : Declares functions to match URLs against URL
9 * Copyright : Written by and Copyright (C) 2001 the SourceForge
10 * IJBSWA team. http://ijbswa.sourceforge.net
12 * Based on the Internet Junkbuster originally written
13 * by and Copyright (C) 1997 Anonymous Coders and
14 * Junkbusters Corporation. http://www.junkbusters.com
16 * This program is free software; you can redistribute it
17 * and/or modify it under the terms of the GNU General
18 * Public License as published by the Free Software
19 * Foundation; either version 2 of the License, or (at
20 * your option) any later version.
22 * This program is distributed in the hope that it will
23 * be useful, but WITHOUT ANY WARRANTY; without even the
24 * implied warranty of MERCHANTABILITY or FITNESS FOR A
25 * PARTICULAR PURPOSE. See the GNU General Public
26 * License for more details.
28 * The GNU General Public License should be included with
29 * this file. If not, you can view it at
30 * http://www.gnu.org/copyleft/gpl.html
31 * or write to the Free Software Foundation, Inc., 59
32 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
35 * $Log: urlmatch.c,v $
36 * Revision 1.1 2002/01/17 20:53:46 jongfoster
37 * Moving all our URL and URL pattern parsing code to the same file - it
38 * was scattered around in filters.c, loaders.c and parsers.c.
40 * Providing a single, simple url_match(pattern,url) function - rather than
41 * the 3-line match routine which was repeated all over the place.
43 * Renaming free_url to free_url_spec, since it frees a struct url_spec.
45 * Providing parse_http_url() so that URLs can be parsed without faking a
46 * HTTP request line for parse_http_request() or repeating the parsing
47 * code (both of which were techniques that were actually in use).
49 * Standardizing that struct http_request is used to represent a URL, and
50 * struct url_spec is used to represent a URL pattern. (Before, URLs were
51 * represented as seperate variables and a partially-filled-in url_spec).
54 *********************************************************************/
61 #include <sys/types.h>
69 #if !defined(_WIN32) && !defined(__OS2__)
79 const char urlmatch_h_rcs[] = URLMATCH_H_VERSION;
81 /* Fix a problem with Solaris. There should be no effect on other
83 * Solaris's isspace() is a macro which uses it's argument directly
84 * as an array index. Therefore we need to make sure that high-bit
85 * characters generate +ve values, and ideally we also want to make
86 * the argument match the declared parameter type of "int".
88 * Why did they write a character function that can't take a simple
89 * "char" argument? Doh!
91 #define ijb_isupper(__X) isupper((int)(unsigned char)(__X))
92 #define ijb_tolower(__X) tolower((int)(unsigned char)(__X))
95 /*********************************************************************
97 * Function : free_http_request
99 * Description : Freez a http_request structure
102 * 1 : http = points to a http_request structure to free
106 *********************************************************************/
107 void free_http_request(struct http_request *http)
115 freez(http->hostport);
118 freez(http->host_ip_addr_str);
119 freez(http->dbuffer);
125 /*********************************************************************
127 * Function : parse_http_url
129 * Description : Parse out the host and port from the URL. Find the
130 * hostname & path, port (if ':'), and/or password (if '@')
133 * 1 : url = URL (or is it URI?) to break down
134 * 2 : http = pointer to the http structure to hold elements.
135 * Will be zeroed before use. Note that this
136 * function sets the http->gpc and http->ver
138 * 3 : csp = Current client state (buffers, headers, etc...)
140 * Returns : JB_ERR_OK on success
141 * JB_ERR_MEMORY on out of memory
142 * JB_ERR_CGI_PARAMS on malformed command/URL
143 * or >100 domains deep.
145 *********************************************************************/
146 jb_err parse_http_url(const char * url,
147 struct http_request *http,
148 struct client_state *csp)
151 * Zero out the results structure
153 memset(http, '\0', sizeof(*http));
157 * Save our initial URL
159 http->url = strdup(url);
160 if (http->url == NULL)
162 return JB_ERR_MEMORY;
167 * Split URL into protocol,hostport,path.
177 return JB_ERR_MEMORY;
180 /* Find the start of the URL in our scratch space */
182 if (strncmpic(url_noproto, "http://", 7) == 0)
187 else if (strncmpic(url_noproto, "https://", 8) == 0)
197 url_path = strchr(url_noproto, '/');
198 if (url_path != NULL)
203 * NOTE: The following line ignores the path for HTTPS URLS.
204 * This means that you get consistent behaviour if you type a
205 * https URL in and it's parsed by the function. (When the
206 * URL is actually retrieved, SSL hides the path part).
208 http->path = strdup(http->ssl ? "/" : url_path);
210 http->hostport = strdup(url_noproto);
215 * Repair broken HTTP requests that don't contain a path,
216 * or CONNECT requests
218 http->path = strdup("/");
219 http->hostport = strdup(url_noproto);
224 if ( (http->path == NULL)
225 || (http->hostport == NULL))
228 free_http_request(http);
229 return JB_ERR_MEMORY;
235 * Split hostport into user/password (ignored), host, port.
242 buf = strdup(http->hostport);
245 free_http_request(http);
246 return JB_ERR_MEMORY;
249 /* check if url contains username and/or password */
250 host = strchr(buf, '@');
253 /* Contains username/password, skip it and the @ sign. */
258 /* No username or password. */
262 /* check if url contains port */
263 port = strchr(host, ':');
267 /* Terminate hostname and point to start of port string */
269 http->port = atoi(port);
273 /* No port specified. */
274 http->port = (http->ssl ? 143 : 80);
277 http->host = strdup(host);
281 if (http->host == NULL)
283 free_http_request(http);
284 return JB_ERR_MEMORY;
290 * Split domain name so we can compare it against wildcards
293 char *vec[BUFFER_SIZE];
297 http->dbuffer = strdup(http->host);
298 if (NULL == http->dbuffer)
300 free_http_request(http);
301 return JB_ERR_MEMORY;
304 /* map to lower case */
305 for (p = http->dbuffer; *p ; p++)
307 *p = tolower((int)(unsigned char)*p);
310 /* split the domain name into components */
311 http->dcount = ssplit(http->dbuffer, ".", vec, SZ(vec), 1, 1);
313 if (http->dcount <= 0)
316 * Error: More than SZ(vec) components in domain
317 * or: no components in domain
319 free_http_request(http);
323 /* save a copy of the pointers in dvec */
324 size = http->dcount * sizeof(*http->dvec);
326 http->dvec = (char **)malloc(size);
327 if (NULL == http->dvec)
329 free_http_request(http);
330 return JB_ERR_MEMORY;
333 memcpy(http->dvec, vec, size);
341 /*********************************************************************
343 * Function : parse_http_request
345 * Description : Parse out the host and port from the URL. Find the
346 * hostname & path, port (if ':'), and/or password (if '@')
349 * 1 : req = HTTP request line to break down
350 * 2 : http = pointer to the http structure to hold elements
351 * 3 : csp = Current client state (buffers, headers, etc...)
353 * Returns : JB_ERR_OK on success
354 * JB_ERR_MEMORY on out of memory
355 * JB_ERR_CGI_PARAMS on malformed command/URL
356 * or >100 domains deep.
358 *********************************************************************/
359 jb_err parse_http_request(const char *req,
360 struct http_request *http,
361 struct client_state *csp)
369 memset(http, '\0', sizeof(*http));
374 return JB_ERR_MEMORY;
377 n = ssplit(buf, " \r\n", v, SZ(v), 1, 1);
384 /* this could be a CONNECT request */
385 if (strcmpic(v[0], "connect") == 0)
390 /* or it could be any other basic HTTP request type */
391 else if ((0 == strcmpic(v[0], "get"))
392 || (0 == strcmpic(v[0], "head"))
393 || (0 == strcmpic(v[0], "post"))
394 || (0 == strcmpic(v[0], "put"))
395 || (0 == strcmpic(v[0], "delete"))
397 /* or a webDAV extension (RFC2518) */
398 || (0 == strcmpic(v[0], "propfind"))
399 || (0 == strcmpic(v[0], "proppatch"))
400 || (0 == strcmpic(v[0], "move"))
401 || (0 == strcmpic(v[0], "copy"))
402 || (0 == strcmpic(v[0], "mkcol"))
403 || (0 == strcmpic(v[0], "lock"))
404 || (0 == strcmpic(v[0], "unlock"))
412 /* Unknown HTTP method */
417 err = parse_http_url(v[1], http, csp);
425 * Copy the details into the structure
427 http->ssl = is_connect;
428 http->cmd = strdup(req);
429 http->gpc = strdup(v[0]);
430 http->ver = strdup(v[2]);
432 if ( (http->cmd == NULL)
433 || (http->gpc == NULL)
434 || (http->ver == NULL) )
437 free_http_request(http);
438 return JB_ERR_MEMORY;
445 /*********************************************************************
447 * Function : simple_domaincmp
449 * Description : Domain-wise Compare fqdn's. The comparison is
450 * both left- and right-anchored. The individual
451 * domain names are compared with simplematch().
452 * This is only used by domain_match.
455 * 1 : pv = array of patterns to compare
456 * 2 : fv = array of domain components to compare
457 * 3 : len = length of the arrays (both arrays are the
458 * same length - if they weren't, it couldn't
459 * possibly be a match).
461 * Returns : 0 => domains are equivalent, else no match.
463 *********************************************************************/
464 static int simple_domaincmp(char **pv, char **fv, int len)
468 for (n = 0; n < len; n++)
470 if (simplematch(pv[n], fv[n]))
481 /*********************************************************************
483 * Function : domain_match
485 * Description : Domain-wise Compare fqdn's. Governed by the bimap in
486 * pattern->unachored, the comparison is un-, left-,
487 * right-anchored, or both.
488 * The individual domain names are compared with
492 * 1 : pattern = a domain that may contain a '*' as a wildcard.
493 * 2 : fqdn = domain name against which the patterns are compared.
495 * Returns : 0 => domains are equivalent, else no match.
497 *********************************************************************/
498 static int domain_match(const struct url_spec *pattern, const struct http_request *fqdn)
500 char **pv, **fv; /* vectors */
502 int unanchored = pattern->unanchored & (ANCHOR_RIGHT | ANCHOR_LEFT);
504 plen = pattern->dcount;
509 /* fqdn is too short to match this pattern */
516 if (unanchored == ANCHOR_LEFT)
521 * Convert this into a fully anchored pattern with
522 * the fqdn and pattern the same length
524 fv += (flen - plen); /* flen - plen >= 0 due to check above */
525 return simple_domaincmp(pv, fv, plen);
527 else if (unanchored == 0)
529 /* Fully anchored, check length */
534 return simple_domaincmp(pv, fv, plen);
536 else if (unanchored == ANCHOR_RIGHT)
538 /* Left anchored, ignore all extra in fqdn */
539 return simple_domaincmp(pv, fv, plen);
545 int maxn = flen - plen;
546 for (n = 0; n <= maxn; n++)
548 if (!simple_domaincmp(pv, fv, plen))
553 * Doesn't match from start of fqdn
554 * Try skipping first part of fqdn
564 /*********************************************************************
566 * Function : create_url_spec
568 * Description : Creates a "url_spec" structure from a string.
569 * When finished, free with unload_url().
572 * 1 : url = Target url_spec to be filled in. Will be
574 * 2 : buf = Source pattern, null terminated. NOTE: The
575 * contents of this buffer are destroyed by this
576 * function. If this function succeeds, the
577 * buffer is copied to url->spec. If this
578 * function fails, the contents of the buffer
581 * Returns : JB_ERR_OK - Success
582 * JB_ERR_MEMORY - Out of memory
583 * JB_ERR_PARSE - Cannot parse regex (Detailed message
584 * written to system log)
586 *********************************************************************/
587 jb_err create_url_spec(struct url_spec * url, const char * buf)
595 memset(url, '\0', sizeof(*url));
597 /* save a copy of the orignal specification */
598 if ((url->spec = strdup(buf)) == NULL)
600 return JB_ERR_MEMORY;
603 if ((p = strchr(buf, '/')))
605 if (NULL == (url->path = strdup(p)))
608 return JB_ERR_MEMORY;
610 url->pathlen = strlen(url->path);
622 char rebuf[BUFFER_SIZE];
624 if (NULL == (url->preg = zalloc(sizeof(*url->preg))))
628 return JB_ERR_MEMORY;
631 sprintf(rebuf, "^(%s)", url->path);
633 errcode = regcomp(url->preg, rebuf,
634 (REG_EXTENDED|REG_NOSUB|REG_ICASE));
637 size_t errlen = regerror(errcode,
638 url->preg, rebuf, sizeof(rebuf));
640 if (errlen > (sizeof(rebuf) - (size_t)1))
642 errlen = sizeof(rebuf) - (size_t)1;
644 rebuf[errlen] = '\0';
646 log_error(LOG_LEVEL_ERROR, "error compiling %s: %s",
657 if ((p = strchr(buf, ':')) == NULL)
672 /* Parse domain part */
673 if (buf[strlen(buf) - 1] == '.')
675 url->unanchored |= ANCHOR_RIGHT;
679 url->unanchored |= ANCHOR_LEFT;
682 /* split domain into components */
684 url->dbuffer = strdup(buf);
685 if (NULL == url->dbuffer)
691 #endif /* def REGEX */
692 return JB_ERR_MEMORY;
695 /* map to lower case */
696 for (p = url->dbuffer; *p ; p++)
698 *p = tolower((int)(unsigned char)*p);
701 /* split the domain name into components */
702 url->dcount = ssplit(url->dbuffer, ".", v, SZ(v), 1, 1);
710 #endif /* def REGEX */
713 return JB_ERR_MEMORY;
715 else if (url->dcount != 0)
718 /* save a copy of the pointers in dvec */
719 size = url->dcount * sizeof(*url->dvec);
721 url->dvec = (char **)malloc(size);
722 if (NULL == url->dvec)
728 #endif /* def REGEX */
731 return JB_ERR_MEMORY;
734 memcpy(url->dvec, v, size);
743 /*********************************************************************
745 * Function : free_url_spec
747 * Description : Called from the "unloaders". Freez the url
748 * structure elements.
751 * 1 : url = pointer to a url_spec structure.
755 *********************************************************************/
756 void free_url_spec(struct url_spec *url)
758 if (url == NULL) return;
775 /*********************************************************************
777 * Function : url_match
779 * Description : Compare a URL against a URL pattern.
782 * 1 : pattern = a URL pattern
783 * 2 : url = URL to match
785 * Returns : 0 iff the URL matches the pattern, else nonzero.
787 *********************************************************************/
788 int url_match(const struct url_spec *pattern,
789 const struct http_request *url)
791 return ((pattern->port == 0) || (pattern->port == url->port))
792 && ((pattern->dbuffer == NULL) || (domain_match(pattern, url) == 0))
793 && ((pattern->path == NULL) ||
795 (regexec(pattern->preg, url->path, 0, NULL, 0) == 0)
797 (strncmp(pattern->path, url->path, pattern->pathlen) == 0)