From 784630767e0247ba970bbc8808fb34147e78c47e Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sun, 6 Nov 2011 11:48:23 +0000 Subject: [PATCH] Reimplement segment splitting in get_last_url() by using ssplit() It's available on all platforms and doesn't require locking. While at it, properly detect URLs that are concatenated together without separator again. All the redirect regression tests should work now. --- configure.in | 6 ++--- filters.c | 75 ++++++++++++++++++++++++++-------------------------- jcc.c | 9 +------ jcc.h | 6 +---- 4 files changed, 42 insertions(+), 54 deletions(-) diff --git a/configure.in b/configure.in index 8845863e..bec11de3 100644 --- a/configure.in +++ b/configure.in @@ -1,6 +1,6 @@ dnl Process this file with autoconf to produce a configure script. dnl -dnl $Id: configure.in,v 1.160 2011/09/04 11:11:17 fabiankeil Exp $ +dnl $Id: configure.in,v 1.161 2011/10/30 16:20:12 fabiankeil Exp $ dnl dnl Written by and Copyright (C) 2001-2010 the dnl Privoxy team. http://www.privoxy.org/ @@ -32,7 +32,7 @@ dnl ================================================================= dnl AutoConf Initialization dnl ================================================================= -AC_REVISION($Revision: 1.160 $) +AC_REVISION($Revision: 1.161 $) AC_INIT(jcc.c) if test ! -f config.h.in; then @@ -704,7 +704,7 @@ dnl bcopy is for PCRE AC_CHECK_FUNCS([bcopy]) AC_PROG_GCC_TRADITIONAL AC_TYPE_SIGNAL -AC_CHECK_FUNCS([access atexit getcwd gethostbyaddr gethostbyaddr_r gethostbyname gethostbyname_r gettimeofday inet_ntoa localtime_r memchr memmove memset poll putenv random regcomp select setlocale snprintf socket strchr strdup strerror strtok strftime strlcat strlcpy strptime strstr strtoul timegm tzset]) +AC_CHECK_FUNCS([access atexit getcwd gethostbyaddr gethostbyaddr_r gethostbyname gethostbyname_r gettimeofday inet_ntoa localtime_r memchr memmove memset poll putenv random regcomp select setlocale snprintf socket strchr strdup strerror strftime strlcat strlcpy strptime strstr strtoul timegm tzset]) dnl Checks for RFC 2553 resolver and socket functions AC_ARG_ENABLE(ipv6-support, diff --git a/filters.c b/filters.c index 39946389..4f4684e2 100644 --- a/filters.c +++ b/filters.c @@ -1,4 +1,4 @@ -const char filters_rcs[] = "$Id: filters.c,v 1.155 2011/11/06 11:36:42 fabiankeil Exp $"; +const char filters_rcs[] = "$Id: filters.c,v 1.156 2011/11/06 11:45:28 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/filters.c,v $ @@ -78,11 +78,6 @@ const char filters_rcs[] = "$Id: filters.c,v 1.155 2011/11/06 11:36:42 fabiankei #include "urlmatch.h" #include "loaders.h" -#ifdef HAVE_STRTOK -/* Only used for locks */ -#include "jcc.h" -#endif /* def HAVE_STRTOK */ - #ifdef _WIN32 #include "win32.h" #endif @@ -1115,62 +1110,66 @@ char *get_last_url(char *subject, const char *redirect_mode) log_error(LOG_LEVEL_REDIRECTS, "Checking \"%s\" for encoded redirects.", subject); -#if defined(MUTEX_LOCKS_AVAILABLE) && defined(HAVE_STRTOK) /* * Check each parameter in the URL separately. * Sectionize the URL at "?" and "&", - * then URL-decode each component, + * go backwards through the segments, URL-decode them * and look for a URL in the decoded result. - * Keep the last one we spot. + * Stop the search after the first match. + */ + char *url_segment = NULL; + /* + * XXX: This estimate is guaranteed to be high enough as we + * let ssplit() ignore empty fields, but also a bit wasteful. */ - char *found = NULL; + size_t max_segments = strlen(subject) / 2; + char **url_segments = malloc(max_segments * sizeof(char *)); + int segments; + + if (NULL == url_segments) + { + log_error(LOG_LEVEL_ERROR, "Out of memory while decoding URL: %s", new_url); + freez(subject); + return NULL; + } + + segments = ssplit(subject, "?&", url_segments, max_segments, 1, 1); - privoxy_mutex_lock(&strtok_mutex); - char *token = strtok(subject, "?&"); - while (token) + while (segments-- > 0) { - char *dtoken = url_decode(token); + char *dtoken = url_decode(url_segments[segments]); if (NULL == dtoken) { - log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", token); + log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", url_segments[segments]); continue; } - char *http_url = strstr(dtoken, "http://"); - char *https_url = strstr(dtoken, "https://"); - char *last_url = (http_url && https_url - ? (http_url < https_url ? http_url : https_url) - : (http_url ? http_url : https_url)); - if (last_url) + url_segment = strstr(dtoken, "http://"); + if (NULL == url_segment) { - freez(found); - found = strdup(last_url); - if (found == NULL) + url_segment = strstr(dtoken, "https://"); + } + if (NULL != url_segment) + { + url_segment = strdup(url_segment); + freez(dtoken); + if (url_segment == NULL) { log_error(LOG_LEVEL_ERROR, "Out of memory while searching for redirects."); - privoxy_mutex_unlock(&strtok_mutex); return NULL; } + break; } freez(dtoken); - token = strtok(NULL, "?&"); } - privoxy_mutex_unlock(&strtok_mutex); freez(subject); + freez(url_segments); - return found; -#else - new_url = url_decode(subject); - if (new_url != NULL) - { - freez(subject); - subject = new_url; - } - else + if (url_segment == NULL) { - log_error(LOG_LEVEL_ERROR, "Unable to decode \"%s\".", subject); + return NULL; } -#endif /* defined(MUTEX_LOCKS_AVAILABLE) && defined(HAVE_STRTOK) */ + subject = url_segment; } /* Else, just look for a URL inside this one, without decoding anything. */ diff --git a/jcc.c b/jcc.c index 3efa3900..946acb0b 100644 --- a/jcc.c +++ b/jcc.c @@ -1,4 +1,4 @@ -const char jcc_rcs[] = "$Id: jcc.c,v 1.371 2011/10/23 11:24:33 fabiankeil Exp $"; +const char jcc_rcs[] = "$Id: jcc.c,v 1.372 2011/10/30 16:20:12 fabiankeil Exp $"; /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/jcc.c,v $ @@ -198,10 +198,6 @@ privoxy_mutex_t localtime_mutex; privoxy_mutex_t rand_mutex; #endif /* ndef HAVE_RANDOM */ -#ifdef HAVE_STRTOK -privoxy_mutex_t strtok_mutex; -#endif /* def HAVE_STRTOK */ - #endif /* def MUTEX_LOCKS_AVAILABLE */ #if defined(unix) @@ -2852,9 +2848,6 @@ static void initialize_mutexes(void) privoxy_mutex_init(&rand_mutex); #endif /* ndef HAVE_RANDOM */ -#ifdef HAVE_STRTOK - privoxy_mutex_init(&strtok_mutex); -#endif /* def HAVE_STRTOK */ #endif /* def MUTEX_LOCKS_AVAILABLE */ } diff --git a/jcc.h b/jcc.h index e871f5df..0035a3ec 100644 --- a/jcc.h +++ b/jcc.h @@ -1,6 +1,6 @@ #ifndef JCC_H_INCLUDED #define JCC_H_INCLUDED -#define JCC_H_VERSION "$Id: jcc.h,v 1.30 2011/09/04 11:10:56 fabiankeil Exp $" +#define JCC_H_VERSION "$Id: jcc.h,v 1.31 2011/10/30 16:20:12 fabiankeil Exp $" /********************************************************************* * * File : $Source: /cvsroot/ijbswa/current/jcc.h,v $ @@ -99,10 +99,6 @@ extern privoxy_mutex_t resolver_mutex; extern privoxy_mutex_t rand_mutex; #endif /* ndef HAVE_RANDOM */ -#ifdef HAVE_STRTOK -extern privoxy_mutex_t strtok_mutex; -#endif /* ndef HAVE_STRTOK */ - #endif /* FEATURE_PTHREAD */ /* Functions */ -- 2.39.2