1 const char encode_rcs[] = "$Id: encode.c,v 1.12 2007/08/04 10:15:51 fabiankeil Exp $";
2 /*********************************************************************
4 * File : $Source: /cvsroot/ijbswa/current/encode.c,v $
6 * Purpose : Functions to encode and decode URLs, and also to
7 * encode cookies and HTML text.
9 * Copyright : Written by and Copyright (C) 2001 the SourceForge
10 * Privoxy team. http://www.privoxy.org/
12 * Based on the Internet Junkbuster originally written
13 * by and Copyright (C) 1997 Anonymous Coders and
14 * Junkbusters Corporation. http://www.junkbusters.com
16 * This program is free software; you can redistribute it
17 * and/or modify it under the terms of the GNU General
18 * Public License as published by the Free Software
19 * Foundation; either version 2 of the License, or (at
20 * your option) any later version.
22 * This program is distributed in the hope that it will
23 * be useful, but WITHOUT ANY WARRANTY; without even the
24 * implied warranty of MERCHANTABILITY or FITNESS FOR A
25 * PARTICULAR PURPOSE. See the GNU General Public
26 * License for more details.
28 * The GNU General Public License should be included with
29 * this file. If not, you can view it at
30 * http://www.gnu.org/copyleft/gpl.html
31 * or write to the Free Software Foundation, Inc., 59
32 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
36 * Revision 1.12 2007/08/04 10:15:51 fabiankeil
37 * Use strlcpy() instead of strcpy().
39 * Revision 1.11 2006/12/28 18:25:53 fabiankeil
40 * Fixed gcc43 compiler warning.
42 * Revision 1.10 2006/07/18 14:48:45 david__schmidt
43 * Reorganizing the repository: swapping out what was HEAD (the old 3.1 branch)
44 * with what was really the latest development (the v_3_0_branch branch)
46 * Revision 1.8 2002/03/26 22:29:54 swa
47 * we have a new homepage!
49 * Revision 1.7 2002/03/24 13:25:43 swa
50 * name change related issues
52 * Revision 1.6 2002/03/13 00:27:04 jongfoster
55 * Revision 1.5 2002/03/07 03:46:53 oes
56 * Fixed compiler warnings etc
58 * Revision 1.4 2002/01/22 23:28:07 jongfoster
59 * Adding convenience function html_encode_and_free_original()
60 * Making all functions accept NULL paramaters - in this case, they
61 * simply return NULL. This allows error-checking to be deferred.
63 * Revision 1.3 2001/11/13 00:16:40 jongfoster
64 * Replacing references to malloc.h with the standard stdlib.h
65 * (See ANSI or K&R 2nd Ed)
67 * Revision 1.2 2001/05/17 22:52:35 oes
68 * - Cleaned CRLF's from the sources and related files
70 * Revision 1.1.1.1 2001/05/15 13:58:51 oes
71 * Initial import of version 2.9.3 source tree
74 *********************************************************************/
87 const char encode_h_rcs[] = ENCODE_H_VERSION;
89 /* Maps special characters in a URL to their equivalent % codes. */
90 static const char * const url_code_map[256] = {
91 NULL, "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09",
92 "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", "%10", "%11", "%12", "%13",
93 "%14", "%15", "%16", "%17", "%18", "%19", "%1A", "%1B", "%1C", "%1D",
94 "%1E", "%1F", "+", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
95 "%28", "%29", NULL, "%2B", "%2C", NULL, NULL, "%2F", NULL, NULL,
96 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "%3A", "%3B",
97 "%3C", "%3D", "%3E", "%3F", NULL, NULL, NULL, NULL, NULL, NULL,
98 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
99 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
100 NULL, "%5B", "%5C", "%5D", "%5E", NULL, "%60", NULL, NULL, NULL,
101 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
102 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
103 NULL, NULL, NULL, "%7B", "%7C", "%7D", "%7E", "%7F", "%80", "%81",
104 "%82", "%83", "%84", "%85", "%86", "%87", "%88", "%89", "%8A", "%8B",
105 "%8C", "%8D", "%8E", "%8F", "%90", "%91", "%92", "%93", "%94", "%95",
106 "%96", "%97", "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
107 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", "%A8", "%A9",
108 "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", "%B0", "%B1", "%B2", "%B3",
109 "%B4", "%B5", "%B6", "%B7", "%B8", "%B9", "%BA", "%BB", "%BC", "%BD",
110 "%BE", "%BF", "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
111 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", "%D0", "%D1",
112 "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", "%D8", "%D9", "%DA", "%DB",
113 "%DC", "%DD", "%DE", "%DF", "%E0", "%E1", "%E2", "%E3", "%E4", "%E5",
114 "%E6", "%E7", "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
115 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", "%F8", "%F9",
116 "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
119 /* Maps special characters in HTML to their equivalent entites. */
120 static const char * const html_code_map[256] = {
121 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
122 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
123 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
124 NULL, NULL, NULL, NULL,""",NULL,NULL,NULL,"&",NULL,
125 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
126 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
127 "<",NULL,">",NULL,NULL, NULL, NULL, NULL, NULL, NULL,
128 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
129 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
130 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
131 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
132 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
133 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
134 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
135 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
136 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
137 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
138 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
139 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
140 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
141 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
142 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
143 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
144 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
145 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
146 NULL, NULL, NULL, NULL, NULL, NULL
149 /* Maps special characters in a cookie to their equivalent % codes. */
150 static const char * const cookie_code_map[256] = {
151 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
152 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
153 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
154 NULL, NULL, "+", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
155 NULL, NULL, NULL, NULL, "%2C",NULL, NULL, NULL, NULL, NULL,
156 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "%3B",
157 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
158 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
159 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
160 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
161 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
162 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
163 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
164 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
165 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
166 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
167 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
168 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
169 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
170 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
171 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
172 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
173 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
174 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
175 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
176 NULL, NULL, NULL, NULL, NULL, NULL
180 /*********************************************************************
182 * Function : html_encode
184 * Description : Encodes a string so it's not interpreted as
185 * containing HTML tags or entities.
186 * Replaces <, >, &, and " with the appropriate HTML
190 * 1 : s = String to encode. Null-terminated.
192 * Returns : Encoded string, newly allocated on the heap.
193 * Caller is responsible for freeing it with free().
194 * If s is NULL, or on out-of memory, returns NULL.
196 *********************************************************************/
197 char * html_encode(const char *s)
207 /* each input char can expand to at most 6 chars */
208 buf_size = (strlen(s) * 6) + 1;
209 buf = (char *) malloc(buf_size);
215 while ( (c = *s++) != '\0')
217 const char * replace_with = html_code_map[(unsigned char) c];
218 if(replace_with != NULL)
220 const size_t bytes_written = (size_t)(p - buf);
221 assert(bytes_written < buf_size);
222 p += strlcpy(p, replace_with, buf_size - bytes_written);
233 assert(strlen(buf) < buf_size);
238 /*********************************************************************
240 * Function : html_encode_and_free_original
242 * Description : Encodes a string so it's not interpreted as
243 * containing HTML tags or entities.
244 * Replaces <, >, &, and " with the appropriate HTML
245 * entities. Free()s original string.
246 * If original string is NULL, simply returns NULL.
249 * 1 : s = String to encode. Null-terminated.
251 * Returns : Encoded string, newly allocated on the heap.
252 * Caller is responsible for freeing it with free().
253 * If s is NULL, or on out-of memory, returns NULL.
255 *********************************************************************/
256 char * html_encode_and_free_original(char *s)
265 result = html_encode(s);
272 /*********************************************************************
274 * Function : cookie_encode
276 * Description : Encodes a string so it can be used in a cookie.
277 * Replaces " ", ",", and ";" with the appropriate
281 * 1 : s = String to encode. Null-terminated.
283 * Returns : Encoded string, newly allocated on the heap.
284 * Caller is responsible for freeing it with free().
285 * If s is NULL, or on out-of memory, returns NULL.
287 *********************************************************************/
288 char * cookie_encode(const char *s)
298 /* each input char can expand to at most 3 chars */
299 buf_size = (strlen(s) * 3) + 1;
300 buf = (char *) malloc(buf_size);
306 while ( (c = *s++) != '\0')
308 const char * replace_with = cookie_code_map[(unsigned char) c];
309 if (replace_with != NULL)
311 const size_t bytes_written = (size_t)(p - buf);
312 assert(bytes_written < buf_size);
313 p += strlcpy(p, replace_with, buf_size - bytes_written);
324 assert(strlen(buf) < buf_size);
328 /*********************************************************************
330 * Function : url_encode
332 * Description : Encodes a string so it can be used in a URL
333 * query string. Replaces special characters with
334 * the appropriate %xx codes.
337 * 1 : s = String to encode. Null-terminated.
339 * Returns : Encoded string, newly allocated on the heap.
340 * Caller is responsible for freeing it with free().
341 * If s is NULL, or on out-of memory, returns NULL.
343 *********************************************************************/
344 char * url_encode(const char *s)
354 /* each input char can expand to at most 3 chars */
355 buf_size = (strlen(s) * 3) + 1;
356 buf = (char *) malloc(buf_size);
362 while( (c = *s++) != '\0')
364 const char * replace_with = url_code_map[(unsigned char) c];
365 if (replace_with != NULL)
367 const size_t bytes_written = (size_t)(p - buf);
368 assert(bytes_written < buf_size);
369 p += strlcpy(p, replace_with, buf_size - bytes_written);
381 assert(strlen(buf) < buf_size);
386 /*********************************************************************
390 * Description : Converts a single hex digit to an integer.
393 * 1 : d = in the range of ['0'..'9', 'A'..'F', 'a'..'f']
395 * Returns : The integer value, or -1 for non-hex characters.
397 *********************************************************************/
398 static int xdtoi(const int d)
400 if ((d >= '0') && (d <= '9'))
404 else if ((d >= 'a') && (d <= 'f'))
406 return(d - 'a' + 10);
408 else if ((d >= 'A') && (d <= 'F'))
410 return(d - 'A' + 10);
419 /*********************************************************************
423 * Description : Hex string to integer conversion.
426 * 1 : s = a 2 digit hex string (e.g. "1f"). Only the
427 * first two characters will be looked at.
429 * Returns : The integer value, or 0 for non-hex strings.
431 *********************************************************************/
432 int xtoi(const char *s)
442 return (d1 << 4) + d2;
450 /*********************************************************************
452 * Function : url_decode
454 * Description : Decodes a URL query string, replacing %xx codes
455 * with their decoded form.
458 * 1 : s = String to decode. Null-terminated.
460 * Returns : Decoded string, newly allocated on the heap.
461 * Caller is responsible for freeing it with free().
463 *********************************************************************/
464 char *url_decode(const char * s)
466 char *buf = malloc(strlen(s) + 1);
481 if ((*q = (char)xtoi(s + 1)) != '\0')
488 /* malformed, just use it */