1 const char encode_rcs[] = "$Id: encode.c,v 1.13 2007/08/18 14:34:27 fabiankeil Exp $";
2 /*********************************************************************
4 * File : $Source: /cvsroot/ijbswa/current/encode.c,v $
6 * Purpose : Functions to encode and decode URLs, and also to
7 * encode cookies and HTML text.
9 * Copyright : Written by and Copyright (C) 2001 the SourceForge
10 * Privoxy team. http://www.privoxy.org/
12 * Based on the Internet Junkbuster originally written
13 * by and Copyright (C) 1997 Anonymous Coders and
14 * Junkbusters Corporation. http://www.junkbusters.com
16 * This program is free software; you can redistribute it
17 * and/or modify it under the terms of the GNU General
18 * Public License as published by the Free Software
19 * Foundation; either version 2 of the License, or (at
20 * your option) any later version.
22 * This program is distributed in the hope that it will
23 * be useful, but WITHOUT ANY WARRANTY; without even the
24 * implied warranty of MERCHANTABILITY or FITNESS FOR A
25 * PARTICULAR PURPOSE. See the GNU General Public
26 * License for more details.
28 * The GNU General Public License should be included with
29 * this file. If not, you can view it at
30 * http://www.gnu.org/copyleft/gpl.html
31 * or write to the Free Software Foundation, Inc., 59
32 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
36 * Revision 1.13 2007/08/18 14:34:27 fabiankeil
37 * Make xtoi() extern so it can be used in pcrs.c.
39 * Revision 1.12 2007/08/04 10:15:51 fabiankeil
40 * Use strlcpy() instead of strcpy().
42 * Revision 1.11 2006/12/28 18:25:53 fabiankeil
43 * Fixed gcc43 compiler warning.
45 * Revision 1.10 2006/07/18 14:48:45 david__schmidt
46 * Reorganizing the repository: swapping out what was HEAD (the old 3.1 branch)
47 * with what was really the latest development (the v_3_0_branch branch)
49 * Revision 1.8 2002/03/26 22:29:54 swa
50 * we have a new homepage!
52 * Revision 1.7 2002/03/24 13:25:43 swa
53 * name change related issues
55 * Revision 1.6 2002/03/13 00:27:04 jongfoster
58 * Revision 1.5 2002/03/07 03:46:53 oes
59 * Fixed compiler warnings etc
61 * Revision 1.4 2002/01/22 23:28:07 jongfoster
62 * Adding convenience function html_encode_and_free_original()
63 * Making all functions accept NULL paramaters - in this case, they
64 * simply return NULL. This allows error-checking to be deferred.
66 * Revision 1.3 2001/11/13 00:16:40 jongfoster
67 * Replacing references to malloc.h with the standard stdlib.h
68 * (See ANSI or K&R 2nd Ed)
70 * Revision 1.2 2001/05/17 22:52:35 oes
71 * - Cleaned CRLF's from the sources and related files
73 * Revision 1.1.1.1 2001/05/15 13:58:51 oes
74 * Initial import of version 2.9.3 source tree
77 *********************************************************************/
90 const char encode_h_rcs[] = ENCODE_H_VERSION;
92 /* Maps special characters in a URL to their equivalent % codes. */
93 static const char * const url_code_map[256] = {
94 NULL, "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09",
95 "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", "%10", "%11", "%12", "%13",
96 "%14", "%15", "%16", "%17", "%18", "%19", "%1A", "%1B", "%1C", "%1D",
97 "%1E", "%1F", "+", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
98 "%28", "%29", NULL, "%2B", "%2C", NULL, NULL, "%2F", NULL, NULL,
99 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "%3A", "%3B",
100 "%3C", "%3D", "%3E", "%3F", NULL, NULL, NULL, NULL, NULL, NULL,
101 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
102 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
103 NULL, "%5B", "%5C", "%5D", "%5E", NULL, "%60", NULL, NULL, NULL,
104 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
105 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
106 NULL, NULL, NULL, "%7B", "%7C", "%7D", "%7E", "%7F", "%80", "%81",
107 "%82", "%83", "%84", "%85", "%86", "%87", "%88", "%89", "%8A", "%8B",
108 "%8C", "%8D", "%8E", "%8F", "%90", "%91", "%92", "%93", "%94", "%95",
109 "%96", "%97", "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
110 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", "%A8", "%A9",
111 "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", "%B0", "%B1", "%B2", "%B3",
112 "%B4", "%B5", "%B6", "%B7", "%B8", "%B9", "%BA", "%BB", "%BC", "%BD",
113 "%BE", "%BF", "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
114 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", "%D0", "%D1",
115 "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", "%D8", "%D9", "%DA", "%DB",
116 "%DC", "%DD", "%DE", "%DF", "%E0", "%E1", "%E2", "%E3", "%E4", "%E5",
117 "%E6", "%E7", "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
118 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", "%F8", "%F9",
119 "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
122 /* Maps special characters in HTML to their equivalent entites. */
123 static const char * const html_code_map[256] = {
124 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
125 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
126 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
127 NULL, NULL, NULL, NULL,""",NULL,NULL,NULL,"&",NULL,
128 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
129 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
130 "<",NULL,">",NULL,NULL, NULL, NULL, NULL, NULL, NULL,
131 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
132 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
133 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
134 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
135 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
136 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
137 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
138 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
139 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
140 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
141 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
142 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
143 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
144 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
145 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
146 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
147 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
148 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
149 NULL, NULL, NULL, NULL, NULL, NULL
153 /*********************************************************************
155 * Function : html_encode
157 * Description : Encodes a string so it's not interpreted as
158 * containing HTML tags or entities.
159 * Replaces <, >, &, and " with the appropriate HTML
163 * 1 : s = String to encode. Null-terminated.
165 * Returns : Encoded string, newly allocated on the heap.
166 * Caller is responsible for freeing it with free().
167 * If s is NULL, or on out-of memory, returns NULL.
169 *********************************************************************/
170 char * html_encode(const char *s)
180 /* each input char can expand to at most 6 chars */
181 buf_size = (strlen(s) * 6) + 1;
182 buf = (char *) malloc(buf_size);
188 while ( (c = *s++) != '\0')
190 const char * replace_with = html_code_map[(unsigned char) c];
191 if(replace_with != NULL)
193 const size_t bytes_written = (size_t)(p - buf);
194 assert(bytes_written < buf_size);
195 p += strlcpy(p, replace_with, buf_size - bytes_written);
206 assert(strlen(buf) < buf_size);
211 /*********************************************************************
213 * Function : html_encode_and_free_original
215 * Description : Encodes a string so it's not interpreted as
216 * containing HTML tags or entities.
217 * Replaces <, >, &, and " with the appropriate HTML
218 * entities. Free()s original string.
219 * If original string is NULL, simply returns NULL.
222 * 1 : s = String to encode. Null-terminated.
224 * Returns : Encoded string, newly allocated on the heap.
225 * Caller is responsible for freeing it with free().
226 * If s is NULL, or on out-of memory, returns NULL.
228 *********************************************************************/
229 char * html_encode_and_free_original(char *s)
238 result = html_encode(s);
245 /*********************************************************************
247 * Function : url_encode
249 * Description : Encodes a string so it can be used in a URL
250 * query string. Replaces special characters with
251 * the appropriate %xx codes.
254 * 1 : s = String to encode. Null-terminated.
256 * Returns : Encoded string, newly allocated on the heap.
257 * Caller is responsible for freeing it with free().
258 * If s is NULL, or on out-of memory, returns NULL.
260 *********************************************************************/
261 char * url_encode(const char *s)
271 /* each input char can expand to at most 3 chars */
272 buf_size = (strlen(s) * 3) + 1;
273 buf = (char *) malloc(buf_size);
279 while( (c = *s++) != '\0')
281 const char * replace_with = url_code_map[(unsigned char) c];
282 if (replace_with != NULL)
284 const size_t bytes_written = (size_t)(p - buf);
285 assert(bytes_written < buf_size);
286 p += strlcpy(p, replace_with, buf_size - bytes_written);
298 assert(strlen(buf) < buf_size);
303 /*********************************************************************
307 * Description : Converts a single hex digit to an integer.
310 * 1 : d = in the range of ['0'..'9', 'A'..'F', 'a'..'f']
312 * Returns : The integer value, or -1 for non-hex characters.
314 *********************************************************************/
315 static int xdtoi(const int d)
317 if ((d >= '0') && (d <= '9'))
321 else if ((d >= 'a') && (d <= 'f'))
323 return(d - 'a' + 10);
325 else if ((d >= 'A') && (d <= 'F'))
327 return(d - 'A' + 10);
336 /*********************************************************************
340 * Description : Hex string to integer conversion.
343 * 1 : s = a 2 digit hex string (e.g. "1f"). Only the
344 * first two characters will be looked at.
346 * Returns : The integer value, or 0 for non-hex strings.
348 *********************************************************************/
349 int xtoi(const char *s)
359 return (d1 << 4) + d2;
367 /*********************************************************************
369 * Function : url_decode
371 * Description : Decodes a URL query string, replacing %xx codes
372 * with their decoded form.
375 * 1 : s = String to decode. Null-terminated.
377 * Returns : Decoded string, newly allocated on the heap.
378 * Caller is responsible for freeing it with free().
380 *********************************************************************/
381 char *url_decode(const char * s)
383 char *buf = malloc(strlen(s) + 1);
398 if ((*q = (char)xtoi(s + 1)) != '\0')
405 /* malformed, just use it */