annotate src/url.c @ 2048:5060d415a85a

clickable menu items (even those introducing submenus) MUST have callbacks I clicked on the "Panel size" item itself instead of any of the options in its submenu, and: Segfault!
author corvid <corvid@lavabit.com>
date Thu, 26 May 2011 02:51:18 +0000
parents f4a6b351012d
children
rev   line source
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
1 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
2 * File: url.c
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
3 *
944
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
4 * Copyright (C) 2001-2009 Jorge Arellano Cid <jcid@dillo.org>
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
5 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
6 * This program is free software; you can redistribute it and/or modify
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
7 * it under the terms of the GNU General Public License as published by
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
8 * the Free Software Foundation; either version 3 of the License, or
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
9 * (at your option) any later version.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
10 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
11
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
12 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
13 * Parse and normalize all URL's inside Dillo.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
14 * - <scheme> <authority> <path> <query> and <fragment> point to 'buffer'.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
15 * - 'url_string' is built upon demand (transparent to the caller).
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
16 * - 'hostname' and 'port' are also being handled on demand.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
17 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
18
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
19 /*
944
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
20 * Regular Expression as given in RFC3986 for URL parsing.
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
21 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
22 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
23 * 12 3 4 5 6 7 8 9
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
24 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
25 * scheme = $2
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
26 * authority = $4
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
27 * path = $5
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
28 * query = $7
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
29 * fragment = $9
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
30 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
31 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
32 * RFC-2396 BNF:
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
33 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
34 * absoluteURI = scheme ":" (hier_part | opaque_part)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
35 * hier_part = (net_path | abs_path) ["?" query]
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
36 * net_path = "//" authority[abs_path]
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
37 * abs_path = "/" path_segments
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
38 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
39 * Notes:
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
40 * - "undefined" means "preceeding separator does not appear".
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
41 * - path is never "undefined" though it may be "empty".
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
42 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
43
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
44 #include <stdlib.h>
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
45 #include <string.h>
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
46 #include <ctype.h>
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
47
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
48 #include "url.h"
360
1354085ccbfe - Removed the remaining DEBUG_MSG in src/
jcid
parents: 334
diff changeset
49 #include "msg.h"
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
50
109
ed0af5361644 - Set the url resolver to escape illegal chars instead of stripping.
jcid
parents: 50
diff changeset
51 static const char *HEX = "0123456789ABCDEF";
ed0af5361644 - Set the url resolver to escape illegal chars instead of stripping.
jcid
parents: 50
diff changeset
52
157
49d4a18c4928 - Switched URL_DATA type from char* to a dStr.
jcid
parents: 109
diff changeset
53 /* URL-field compare methods */
49d4a18c4928 - Switched URL_DATA type from char* to a dStr.
jcid
parents: 109
diff changeset
54 #define URL_STR_FIELD_CMP(s1,s2) \
49d4a18c4928 - Switched URL_DATA type from char* to a dStr.
jcid
parents: 109
diff changeset
55 (s1) && (s2) ? strcmp(s1,s2) : !(s1) && !(s2) ? 0 : (s1) ? 1 : -1
49d4a18c4928 - Switched URL_DATA type from char* to a dStr.
jcid
parents: 109
diff changeset
56 #define URL_STR_FIELD_I_CMP(s1,s2) \
49d4a18c4928 - Switched URL_DATA type from char* to a dStr.
jcid
parents: 109
diff changeset
57 (s1) && (s2) ? dStrcasecmp(s1,s2) : !(s1) && !(s2) ? 0 : (s1) ? 1 : -1
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
58
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
59 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
60 * Return the url as a string.
50
22a3dedcb1a0 - s/camp/field/ s/CAMP/FIELD/
jcid
parents: 35
diff changeset
61 * (initializing 'url_string' field if necessary)
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
62 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
63 char *a_Url_str(const DilloUrl *u)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
64 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
65 /* Internal url handling IS transparent to the caller */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
66 DilloUrl *url = (DilloUrl *) u;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
67
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
68 dReturn_val_if_fail (url != NULL, NULL);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
69
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
70 if (!url->url_string) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
71 url->url_string = dStr_sized_new(60);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
72 dStr_sprintf(
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
73 url->url_string, "%s%s%s%s%s%s%s%s%s%s",
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
74 url->scheme ? url->scheme : "",
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
75 url->scheme ? ":" : "",
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
76 url->authority ? "//" : "",
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
77 url->authority ? url->authority : "",
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
78 // (url->path && url->path[0] != '/' && url->authority) ? "/" : "",
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
79 (url->authority && (!url->path || *url->path != '/')) ? "/" : "",
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
80 url->path ? url->path : "",
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
81 url->query ? "?" : "",
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
82 url->query ? url->query : "",
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
83 url->fragment ? "#" : "",
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
84 url->fragment ? url->fragment : "");
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
85 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
86
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
87 return url->url_string->str;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
88 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
89
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
90 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
91 * Return the hostname as a string.
50
22a3dedcb1a0 - s/camp/field/ s/CAMP/FIELD/
jcid
parents: 35
diff changeset
92 * (initializing 'hostname' and 'port' fields if necessary)
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
93 * Note: a similar approach can be taken for user:password auth.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
94 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
95 const char *a_Url_hostname(const DilloUrl *u)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
96 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
97 char *p;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
98 /* Internal url handling IS transparent to the caller */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
99 DilloUrl *url = (DilloUrl *) u;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
100
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
101 if (!url->hostname && url->authority) {
411
009c4cf94433 - Changed the google search URL (UTF-8 request)
jcid
parents: 360
diff changeset
102 if (url->authority[0] == '[' && (p = strchr(url->authority, ']'))) {
009c4cf94433 - Changed the google search URL (UTF-8 request)
jcid
parents: 360
diff changeset
103 /* numeric ipv6 address, strip the brackets */
009c4cf94433 - Changed the google search URL (UTF-8 request)
jcid
parents: 360
diff changeset
104 url->hostname = dStrndup(url->authority + 1,
009c4cf94433 - Changed the google search URL (UTF-8 request)
jcid
parents: 360
diff changeset
105 (uint_t)(p - url->authority - 1));
009c4cf94433 - Changed the google search URL (UTF-8 request)
jcid
parents: 360
diff changeset
106 if ((p = strchr(p, ':'))) {
009c4cf94433 - Changed the google search URL (UTF-8 request)
jcid
parents: 360
diff changeset
107 url->port = strtol(p + 1, NULL, 10);
009c4cf94433 - Changed the google search URL (UTF-8 request)
jcid
parents: 360
diff changeset
108 }
009c4cf94433 - Changed the google search URL (UTF-8 request)
jcid
parents: 360
diff changeset
109 } else {
009c4cf94433 - Changed the google search URL (UTF-8 request)
jcid
parents: 360
diff changeset
110 /* numeric ipv4 or hostname */
009c4cf94433 - Changed the google search URL (UTF-8 request)
jcid
parents: 360
diff changeset
111 if ((p = strchr(url->authority, ':'))) {
009c4cf94433 - Changed the google search URL (UTF-8 request)
jcid
parents: 360
diff changeset
112 url->port = strtol(p + 1, NULL, 10);
009c4cf94433 - Changed the google search URL (UTF-8 request)
jcid
parents: 360
diff changeset
113 url->hostname = dStrndup(url->authority,
944
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
114 (uint_t)(p - url->authority));
411
009c4cf94433 - Changed the google search URL (UTF-8 request)
jcid
parents: 360
diff changeset
115 } else {
009c4cf94433 - Changed the google search URL (UTF-8 request)
jcid
parents: 360
diff changeset
116 url->hostname = url->authority;
009c4cf94433 - Changed the google search URL (UTF-8 request)
jcid
parents: 360
diff changeset
117 }
009c4cf94433 - Changed the google search URL (UTF-8 request)
jcid
parents: 360
diff changeset
118 }
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
119 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
120
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
121 return url->hostname;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
122 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
123
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
124 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
125 * Create a DilloUrl object and initialize it.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
126 * (buffer, scheme, authority, path, query and fragment).
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
127 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
128 static DilloUrl *Url_object_new(const char *uri_str)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
129 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
130 DilloUrl *url;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
131 char *s, *p;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
132
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
133 dReturn_val_if_fail (uri_str != NULL, NULL);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
134
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
135 url = dNew0(DilloUrl, 1);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
136
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
137 /* remove leading & trailing space from buffer */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
138 url->buffer = dStrstrip(dStrdup(uri_str));
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
139
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
140 s = (char *) url->buffer;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
141 p = strpbrk(s, ":/?#");
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
142 if (p && p[0] == ':' && p > s) { /* scheme */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
143 *p = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
144 url->scheme = s;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
145 s = ++p;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
146 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
147 /* p = strpbrk(s, "/"); */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
148 if (p == s && p[0] == '/' && p[1] == '/') { /* authority */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
149 s = p + 2;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
150 p = strpbrk(s, "/?#");
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
151 if (p) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
152 memmove(s - 2, s, (size_t)MAX(p - s, 1));
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
153 url->authority = s - 2;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
154 p[-2] = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
155 s = p;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
156 } else if (*s) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
157 url->authority = s;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
158 return url;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
159 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
160 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
161
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
162 p = strpbrk(s, "?#");
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
163 if (p) { /* path */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
164 url->path = (p > s) ? s : NULL;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
165 s = p;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
166 } else if (*s) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
167 url->path = s;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
168 return url;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
169 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
170
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
171 p = strpbrk(s, "?#");
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
172 if (p && p[0] == '?') { /* query */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
173 *p = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
174 s = p + 1;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
175 url->query = s;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
176 p = strpbrk(s, "#");
334
619177c88430 - Forbid dpi GET and POST from non dpi-generated urls.
jcid
parents: 158
diff changeset
177 url->flags |= URL_Get;
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
178 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
179 if (p && p[0] == '#') { /* fragment */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
180 *p = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
181 s = p + 1;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
182 url->fragment = s;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
183 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
184
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
185 return url;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
186 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
187
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
188 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
189 * Free a DilloUrl
1067
73aba13ddaed Removed redundant caller NULL checks already in the API
Jeremy Henty <onepoint@starurchin.org>
parents: 944
diff changeset
190 * Do nothing if the argument is NULL
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
191 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
192 void a_Url_free(DilloUrl *url)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
193 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
194 if (url) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
195 if (url->url_string)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
196 dStr_free(url->url_string, TRUE);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
197 if (url->hostname != url->authority)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
198 dFree((char *)url->hostname);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
199 dFree((char *)url->buffer);
157
49d4a18c4928 - Switched URL_DATA type from char* to a dStr.
jcid
parents: 109
diff changeset
200 dStr_free(url->data, 1);
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
201 dFree((char *)url->alt);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
202 dFree(url);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
203 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
204 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
205
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
206 /*
944
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
207 * Resolve the URL as RFC3986 suggests.
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
208 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
209 static Dstr *Url_resolve_relative(const char *RelStr,
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
210 DilloUrl *BaseUrlPar,
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
211 const char *BaseStr)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
212 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
213 char *p, *s, *e;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
214 int i;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
215 Dstr *SolvedUrl, *Path;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
216 DilloUrl *RelUrl, *BaseUrl = NULL;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
217
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
218 /* parse relative URL */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
219 RelUrl = Url_object_new(RelStr);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
220
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
221 if (BaseUrlPar) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
222 BaseUrl = BaseUrlPar;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
223 } else if (RelUrl->scheme == NULL) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
224 /* only required when there's no <scheme> in RelStr */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
225 BaseUrl = Url_object_new(BaseStr);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
226 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
227
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
228 SolvedUrl = dStr_sized_new(64);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
229 Path = dStr_sized_new(64);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
230
944
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
231 /* path empty && scheme and authority undefined */
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
232 if (!RelUrl->path && !RelUrl->scheme && !RelUrl->authority) {
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
233 dStr_append(SolvedUrl, BaseStr);
944
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
234 if ((p = strchr(SolvedUrl->str, '#')))
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
235 dStr_truncate(SolvedUrl, p - SolvedUrl->str);
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
236
944
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
237 if (RelUrl->query) { /* query */
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
238 if (BaseUrl->query)
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
239 dStr_truncate(SolvedUrl, BaseUrl->query - BaseUrl->buffer - 1);
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
240 dStr_append_c(SolvedUrl, '?');
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
241 dStr_append(SolvedUrl, RelUrl->query);
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
242 }
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
243 if (RelUrl->fragment) { /* fragment */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
244 dStr_append_c(SolvedUrl, '#');
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
245 dStr_append(SolvedUrl, RelUrl->fragment);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
246 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
247 goto done;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
248
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
249 } else if (RelUrl->scheme) { /* scheme */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
250 dStr_append(SolvedUrl, RelStr);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
251 goto done;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
252
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
253 } else if (RelUrl->authority) { /* authority */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
254 // Set the Path buffer and goto "STEP 7";
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
255 if (RelUrl->path)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
256 dStr_append(Path, RelUrl->path);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
257
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
258 } else {
944
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
259 if (RelUrl->path && RelUrl->path[0] == '/') { /* absolute path */
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
260 ; /* Ignore BaseUrl path */
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
261 } else if (BaseUrl->path) { /* relative path */
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
262 dStr_append(Path, BaseUrl->path);
944
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
263 for (i = Path->len; --i >= 0 && Path->str[i] != '/'; ) ;
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
264 if (Path->str[i] == '/')
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
265 dStr_truncate(Path, ++i);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
266 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
267 if (RelUrl->path)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
268 dStr_append(Path, RelUrl->path);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
269
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
270 // erase "./"
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
271 while ((p=strstr(Path->str, "./")) &&
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
272 (p == Path->str || p[-1] == '/'))
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
273 dStr_erase(Path, p - Path->str, 2);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
274 // erase last "."
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
275 if (Path->len && Path->str[Path->len - 1] == '.' &&
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
276 (Path->len == 1 || Path->str[Path->len - 2] == '/'))
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
277 dStr_truncate(Path, Path->len - 1);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
278
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
279 // erase "<segment>/../" and "<segment>/.."
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
280 s = p = Path->str;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
281 while ( (p = strstr(p, "/..")) != NULL ) {
944
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
282 if (p[3] == '/' || !p[3]) { // "/../" | "/.."
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
283 for (e = p + 3 ; p > s && p[-1] != '/'; --p) ;
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
284 dStr_erase(Path, p - Path->str, e - p + (p > s && *e != 0));
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
285 p -= (p > Path->str);
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
286 } else
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
287 p += 3;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
288 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
289 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
290
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
291 /* STEP 7
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
292 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
293
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
294 /* scheme */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
295 if (BaseUrl->scheme) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
296 dStr_append(SolvedUrl, BaseUrl->scheme);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
297 dStr_append_c(SolvedUrl, ':');
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
298 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
299
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
300 /* authority */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
301 if (RelUrl->authority) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
302 dStr_append(SolvedUrl, "//");
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
303 dStr_append(SolvedUrl, RelUrl->authority);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
304 } else if (BaseUrl->authority) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
305 dStr_append(SolvedUrl, "//");
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
306 dStr_append(SolvedUrl, BaseUrl->authority);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
307 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
308
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
309 /* path */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
310 if ((RelUrl->authority || BaseUrl->authority) &&
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
311 ((Path->len == 0 && (RelUrl->query || RelUrl->fragment)) ||
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
312 (Path->len && Path->str[0] != '/')))
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
313 dStr_append_c(SolvedUrl, '/'); /* hack? */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
314 dStr_append(SolvedUrl, Path->str);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
315
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
316 /* query */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
317 if (RelUrl->query) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
318 dStr_append_c(SolvedUrl, '?');
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
319 dStr_append(SolvedUrl, RelUrl->query);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
320 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
321
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
322 /* fragment */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
323 if (RelUrl->fragment) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
324 dStr_append_c(SolvedUrl, '#');
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
325 dStr_append(SolvedUrl, RelUrl->fragment);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
326 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
327
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
328 done:
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
329 dStr_free(Path, TRUE);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
330 a_Url_free(RelUrl);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
331 if (BaseUrl != BaseUrlPar)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
332 a_Url_free(BaseUrl);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
333 return SolvedUrl;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
334 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
335
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
336 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
337 * Transform (and resolve) an URL string into the respective DilloURL.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
338 * If URL = "http://dillo.sf.net:8080/index.html?long#part2"
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
339 * then the resulting DilloURL should be:
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
340 * DilloURL = {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
341 * url_string = "http://dillo.sf.net:8080/index.html?long#part2"
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
342 * scheme = "http"
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
343 * authority = "dillo.sf.net:8080:
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
344 * path = "/index.html"
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
345 * query = "long"
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
346 * fragment = "part2"
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
347 * hostname = "dillo.sf.net"
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
348 * port = 8080
334
619177c88430 - Forbid dpi GET and POST from non dpi-generated urls.
jcid
parents: 158
diff changeset
349 * flags = URL_Get
157
49d4a18c4928 - Switched URL_DATA type from char* to a dStr.
jcid
parents: 109
diff changeset
350 * data = Dstr * ("")
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
351 * alt = NULL
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
352 * ismap_url_len = 0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
353 * }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
354 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
355 * Return NULL if URL is badly formed.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
356 */
334
619177c88430 - Forbid dpi GET and POST from non dpi-generated urls.
jcid
parents: 158
diff changeset
357 DilloUrl* a_Url_new(const char *url_str, const char *base_url)
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
358 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
359 DilloUrl *url;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
360 char *urlstr = (char *)url_str; /* auxiliar variable, don't free */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
361 char *p, *str1 = NULL, *str2 = NULL;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
362 Dstr *SolvedUrl;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
363 int i, n_ic, n_ic_spc;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
364
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
365 dReturn_val_if_fail (url_str != NULL, NULL);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
366
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
367 /* Count illegal characters (0x00-0x1F, 0x7F and space) */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
368 n_ic = n_ic_spc = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
369 for (p = (char*)url_str; *p; p++) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
370 n_ic_spc += (*p == ' ') ? 1 : 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
371 n_ic += (*p != ' ' && *p > 0x1F && *p != 0x7F) ? 0 : 1;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
372 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
373 if (n_ic) {
109
ed0af5361644 - Set the url resolver to escape illegal chars instead of stripping.
jcid
parents: 50
diff changeset
374 /* Encode illegal characters (they could also be stripped).
ed0af5361644 - Set the url resolver to escape illegal chars instead of stripping.
jcid
parents: 50
diff changeset
375 * There's no standard for illegal chars; we chose to encode. */
ed0af5361644 - Set the url resolver to escape illegal chars instead of stripping.
jcid
parents: 50
diff changeset
376 p = str1 = dNew(char, strlen(url_str) + 2*n_ic + 1);
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
377 for (i = 0; url_str[i]; ++i)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
378 if (url_str[i] > 0x1F && url_str[i] != 0x7F && url_str[i] != ' ')
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
379 *p++ = url_str[i];
109
ed0af5361644 - Set the url resolver to escape illegal chars instead of stripping.
jcid
parents: 50
diff changeset
380 else {
ed0af5361644 - Set the url resolver to escape illegal chars instead of stripping.
jcid
parents: 50
diff changeset
381 *p++ = '%';
ed0af5361644 - Set the url resolver to escape illegal chars instead of stripping.
jcid
parents: 50
diff changeset
382 *p++ = HEX[(url_str[i] >> 4) & 15];
ed0af5361644 - Set the url resolver to escape illegal chars instead of stripping.
jcid
parents: 50
diff changeset
383 *p++ = HEX[url_str[i] & 15];
ed0af5361644 - Set the url resolver to escape illegal chars instead of stripping.
jcid
parents: 50
diff changeset
384 }
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
385 *p = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
386 urlstr = str1;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
387 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
388
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
389 /* let's use a heuristic to set http: as default */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
390 if (!base_url) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
391 base_url = "http:";
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
392 if (urlstr[0] != '/') {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
393 p = strpbrk(urlstr, "/#?:");
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
394 if (!p || *p != ':')
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
395 urlstr = str2 = dStrconcat("//", urlstr, NULL);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
396 } else if (urlstr[1] != '/')
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
397 urlstr = str2 = dStrconcat("/", urlstr, NULL);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
398 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
399
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
400 /* Resolve the URL */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
401 SolvedUrl = Url_resolve_relative(urlstr, NULL, base_url);
360
1354085ccbfe - Removed the remaining DEBUG_MSG in src/
jcid
parents: 334
diff changeset
402 _MSG("SolvedUrl = %s\n", SolvedUrl->str);
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
403
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
404 /* Fill url data */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
405 url = Url_object_new(SolvedUrl->str);
157
49d4a18c4928 - Switched URL_DATA type from char* to a dStr.
jcid
parents: 109
diff changeset
406 url->data = dStr_new("");
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
407 url->url_string = SolvedUrl;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
408 url->illegal_chars = n_ic;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
409 url->illegal_chars_spc = n_ic_spc;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
410
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
411 dFree(str1);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
412 dFree(str2);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
413 return url;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
414 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
415
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
416
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
417 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
418 * Duplicate a Url structure
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
419 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
420 DilloUrl* a_Url_dup(const DilloUrl *ori)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
421 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
422 DilloUrl *url;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
423
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
424 url = Url_object_new(URL_STR_(ori));
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
425 dReturn_val_if_fail (url != NULL, NULL);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
426
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
427 url->url_string = dStr_new(URL_STR(ori));
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
428 url->port = ori->port;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
429 url->flags = ori->flags;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
430 url->alt = dStrdup(ori->alt);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
431 url->ismap_url_len = ori->ismap_url_len;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
432 url->illegal_chars = ori->illegal_chars;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
433 url->illegal_chars_spc = ori->illegal_chars_spc;
157
49d4a18c4928 - Switched URL_DATA type from char* to a dStr.
jcid
parents: 109
diff changeset
434 url->data = dStr_sized_new(URL_DATA(ori)->len);
49d4a18c4928 - Switched URL_DATA type from char* to a dStr.
jcid
parents: 109
diff changeset
435 dStr_append_l(url->data, URL_DATA(ori)->str, URL_DATA(ori)->len);
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
436 return url;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
437 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
438
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
439 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
440 * Compare two Url's to check if they're the same, or which one is bigger.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
441 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
442 * The fields which are compared here are:
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
443 * <scheme>, <authority>, <path>, <query> and <data>
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
444 * Other fields are left for the caller to check
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
445 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
446 * Return value: 0 if equal, > 0 if A > B, < 0 if A < B.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
447 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
448 * Note: this function defines a sorting order different from strcmp!
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
449 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
450 int a_Url_cmp(const DilloUrl *A, const DilloUrl *B)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
451 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
452 int st;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
453
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
454 dReturn_val_if_fail(A && B, 1);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
455
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
456 if (A == B ||
50
22a3dedcb1a0 - s/camp/field/ s/CAMP/FIELD/
jcid
parents: 35
diff changeset
457 ((st = URL_STR_FIELD_I_CMP(A->authority, B->authority)) == 0 &&
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
458 (st = strcmp(A->path ? A->path + (*A->path == '/') : "",
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
459 B->path ? B->path + (*B->path == '/') : "")) == 0 &&
50
22a3dedcb1a0 - s/camp/field/ s/CAMP/FIELD/
jcid
parents: 35
diff changeset
460 //(st = URL_STR_FIELD_CMP(A->path, B->path)) == 0 &&
22a3dedcb1a0 - s/camp/field/ s/CAMP/FIELD/
jcid
parents: 35
diff changeset
461 (st = URL_STR_FIELD_CMP(A->query, B->query)) == 0 &&
157
49d4a18c4928 - Switched URL_DATA type from char* to a dStr.
jcid
parents: 109
diff changeset
462 (st = dStr_cmp(A->data, B->data)) == 0 &&
158
5a0ce35806df - Fixed a bug in a_Url_cmp.
jcid
parents: 157
diff changeset
463 (st = URL_STR_FIELD_I_CMP(A->scheme, B->scheme)) == 0))
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
464 return 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
465 return st;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
466 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
467
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
468 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
469 * Set DilloUrl flags
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
470 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
471 void a_Url_set_flags(DilloUrl *u, int flags)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
472 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
473 if (u)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
474 u->flags = flags;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
475 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
476
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
477 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
478 * Set DilloUrl data (like POST info, etc.)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
479 */
157
49d4a18c4928 - Switched URL_DATA type from char* to a dStr.
jcid
parents: 109
diff changeset
480 void a_Url_set_data(DilloUrl *u, Dstr **data)
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
481 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
482 if (u) {
157
49d4a18c4928 - Switched URL_DATA type from char* to a dStr.
jcid
parents: 109
diff changeset
483 dStr_free(u->data, 1);
49d4a18c4928 - Switched URL_DATA type from char* to a dStr.
jcid
parents: 109
diff changeset
484 u->data = *data;
49d4a18c4928 - Switched URL_DATA type from char* to a dStr.
jcid
parents: 109
diff changeset
485 *data = NULL;
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
486 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
487 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
488
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
489 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
490 * Set DilloUrl alt (alternate text to the URL. Used by image maps)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
491 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
492 void a_Url_set_alt(DilloUrl *u, const char *alt)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
493 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
494 if (u) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
495 dFree((char *)u->alt);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
496 u->alt = dStrdup(alt);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
497 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
498 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
499
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
500 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
501 * Set DilloUrl ismap coordinates
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
502 * (this is optimized for not hogging the CPU)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
503 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
504 void a_Url_set_ismap_coords(DilloUrl *u, char *coord_str)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
505 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
506 dReturn_if_fail (u && coord_str);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
507
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
508 if (!u->ismap_url_len) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
509 /* Save base-url length (without coords) */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
510 u->ismap_url_len = URL_STR_(u) ? u->url_string->len : 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
511 a_Url_set_flags(u, URL_FLAGS(u) | URL_Ismap);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
512 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
513 if (u->url_string) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
514 dStr_truncate(u->url_string, u->ismap_url_len);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
515 dStr_append(u->url_string, coord_str);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
516 u->query = u->url_string->str + u->ismap_url_len + 1;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
517 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
518 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
519
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
520 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
521 * Given an hex octet (e.g., e3, 2F, 20), return the corresponding
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
522 * character if the octet is valid, and -1 otherwise
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
523 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
524 static int Url_decode_hex_octet(const char *s)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
525 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
526 int hex_value;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
527 char *tail, hex[3];
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
528
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
529 if (s && (hex[0] = s[0]) && (hex[1] = s[1])) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
530 hex[2] = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
531 hex_value = strtol(hex, &tail, 16);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
532 if (tail - hex == 2)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
533 return hex_value;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
534 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
535 return -1;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
536 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
537
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
538 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
539 * Parse possible hexadecimal octets in the URI path.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
540 * Returns a new allocated string.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
541 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
542 char *a_Url_decode_hex_str(const char *str)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
543 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
544 char *new_str, *dest;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
545 int i, val;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
546
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
547 if (!str)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
548 return NULL;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
549
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
550 /* most cases won't have hex octets */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
551 if (!strchr(str, '%'))
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
552 return dStrdup(str);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
553
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
554 dest = new_str = dNew(char, strlen(str) + 1);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
555
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
556 for (i = 0; str[i]; i++) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
557 *dest++ = (str[i] == '%' && (val = Url_decode_hex_octet(str+i+1)) >= 0) ?
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
558 i+=2, val : str[i];
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
559 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
560 *dest++ = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
561
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
562 new_str = dRealloc(new_str, sizeof(char) * (dest - new_str));
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
563 return new_str;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
564 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
565
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
566 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
567 * Urlencode 'str'
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
568 * -RL :: According to the RFC 1738, only alphanumerics, the special
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
569 * characters "$-_.+!*'(),", and reserved characters ";/?:@=&" used
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
570 * for their *reserved purposes* may be used unencoded within a URL.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
571 * We'll escape everything but alphanumeric and "-_.*" (as lynx). --Jcid
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
572 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
573 * Note: the content type "application/x-www-form-urlencoded" is used:
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
574 * i.e., ' ' -> '+' and '\n' -> CR LF (see HTML 4.01, Sec. 17.13.4)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
575 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
576 char *a_Url_encode_hex_str(const char *str)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
577 {
1141
corvid <corvid@lavabit.com>
parents: 1067
diff changeset
578 static const char *const verbatim = "-_.*";
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
579 char *newstr, *c;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
580
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
581 if (!str)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
582 return NULL;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
583
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
584 newstr = dNew(char, 6*strlen(str)+1);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
585
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
586 for (c = newstr; *str; str++)
1221
f34b803b8639 Handle signed chars. Aadded dIsspace() and dIsalnum() to dlib
Jorge Arellano Cid <jcid@dillo.org>
parents: 1186
diff changeset
587 if ((dIsalnum(*str) && !(*str & 0x80)) || strchr(verbatim, *str))
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
588 /* we really need isalnum for the "C" locale */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
589 *c++ = *str;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
590 else if (*str == ' ')
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
591 *c++ = '+';
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
592 else if (*str == '\n') {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
593 *c++ = '%';
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
594 *c++ = '0';
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
595 *c++ = 'D';
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
596 *c++ = '%';
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
597 *c++ = '0';
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
598 *c++ = 'A';
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
599 } else {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
600 *c++ = '%';
109
ed0af5361644 - Set the url resolver to escape illegal chars instead of stripping.
jcid
parents: 50
diff changeset
601 *c++ = HEX[(*str >> 4) & 15];
ed0af5361644 - Set the url resolver to escape illegal chars instead of stripping.
jcid
parents: 50
diff changeset
602 *c++ = HEX[*str & 15];
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
603 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
604 *c = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
605
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
606 return newstr;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
607 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
608
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
609
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
610 /*
944
ff7476f5317b Updated the URL resolver to comply with RFC-3986
Jorge Arellano Cid <jcid@dillo.org>
parents: 411
diff changeset
611 * RFC-3986 suggests this stripping when "importing" URLs from other media.
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
612 * Strip: "URL:", enclosing < >, and embedded whitespace.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
613 * (We also strip illegal chars: 00-1F and 7F)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
614 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
615 char *a_Url_string_strip_delimiters(const char *str)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
616 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
617 char *p, *new_str, *text;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
618
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
619 new_str = text = dStrdup(str);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
620
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
621 if (new_str) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
622 if (strncmp(new_str, "URL:", 4) == 0)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
623 text += 4;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
624 if (*text == '<')
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
625 text++;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
626
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
627 for (p = new_str; *text; text++)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
628 if (*text > 0x1F && *text != 0x7F && *text != ' ')
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
629 *p++ = *text;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
630 if (p > new_str && p[-1] == '>')
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
631 --p;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
632 *p = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
633 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
634 return new_str;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
635 }
1644
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
636
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
637 /*
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
638 * Is the provided hostname an IP address?
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
639 */
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
640 static bool_t Url_host_is_ip(const char *host)
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
641 {
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
642 uint_t len;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
643
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
644 if (!host || !*host)
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
645 return FALSE;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
646
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
647 len = strlen(host);
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
648
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
649 if (len == strspn(host, "0123456789.")) {
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
650 _MSG("an IPv4 address\n");
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
651 return TRUE;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
652 }
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
653 if (*host == '[' &&
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
654 (len == strspn(host, "0123456789abcdefABCDEF:.[]"))) {
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
655 /* The precise format is shown in section 3.2.2 of rfc 3986 */
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
656 _MSG("an IPv6 address\n");
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
657 return TRUE;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
658 }
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
659 return FALSE;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
660 }
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
661
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
662 /*
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
663 * How many internal dots are in the public portion of this hostname?
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
664 * e.g., for "www.dillo.org", it is one because everything under "dillo.org",
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
665 * as a .org domain, is part of one organization.
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
666 *
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
667 * Of course this is only a simple and imperfect approximation of
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
668 * organizational boundaries.
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
669 */
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
670 static uint_t Url_host_public_internal_dots(const char *host)
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
671 {
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
672 uint_t ret = 1;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
673
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
674 if (host) {
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
675 int start, after, tld_len;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
676
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
677 /* We may be able to trust the format of the host string more than
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
678 * I am here. Trailing dots and no dots are real possibilities, though.
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
679 */
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
680 after = strlen(host);
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
681 if (after > 0 && host[after - 1] == '.')
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
682 after--;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
683 start = after;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
684 while (start > 0 && host[start - 1] != '.')
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
685 start--;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
686 tld_len = after - start;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
687
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
688 if (tld_len > 0) {
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
689 /* These TLDs were chosen by examining the current publicsuffix list
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
690 * in January 2010 and picking out those where it was simplest for
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
691 * them to describe the situation by beginning with a "*.[tld]" rule.
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
692 */
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
693 const char *const tlds[] = {"ar","au","bd","bn","bt","ck","cy","do",
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
694 "eg","er","et","fj","fk","gt","gu","id",
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
695 "il","jm","ke","kh","kw","ml","mm","mt",
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
696 "mz","ni","np","nz","om","pg","py","qa",
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
697 "sv","tr","uk","uy","ve","ye","yu","za",
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
698 "zm","zw"};
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
699 uint_t i, tld_num = sizeof(tlds) / sizeof(tlds[0]);
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
700
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
701 for (i = 0; i < tld_num; i++) {
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
702 if (strlen(tlds[i]) == (uint_t) tld_len &&
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
703 !dStrncasecmp(tlds[i], host + start, tld_len)) {
1647
e1eab0c4d97b silence a MSG
corvid <corvid@lavabit.com>
parents: 1644
diff changeset
704 _MSG("TLD code matched %s\n", tlds[i]);
1644
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
705 ret++;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
706 break;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
707 }
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
708 }
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
709 }
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
710 }
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
711 return ret;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
712 }
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
713
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
714 /*
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
715 * Given a URL host string, return the portion that is public, i.e., the
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
716 * domain that is in a registry outside the organization.
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
717 * For 'www.dillo.org', that would be 'dillo.org'.
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
718 */
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
719 const char *a_Url_host_find_public_suffix(const char *host)
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
720 {
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
721 const char *s;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
722 uint_t dots;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
723
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
724 if (!host || !*host || Url_host_is_ip(host))
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
725 return host;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
726
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
727 s = host;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
728
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
729 while (s[1])
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
730 s++;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
731
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
732 if (s > host && *s == '.') {
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
733 /* don't want to deal with trailing dot */
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
734 s--;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
735 }
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
736
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
737 dots = Url_host_public_internal_dots(host);
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
738
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
739 /* With a proper host string, we should not be pointing to a dot now. */
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
740
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
741 while (s > host) {
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
742 if (s[-1] == '.') {
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
743 if (dots == 0)
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
744 break;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
745 else
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
746 dots--;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
747 }
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
748 s--;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
749 }
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
750
1685
e27f1df9ae80 clearer Capi_filters_allow msg
Jeremy Henty, corvid
parents: 1647
diff changeset
751 _MSG("public suffix of %s is %s\n", host, s);
1644
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
752 return s;
2650456b8199 filter automatic requests
corvid <corvid@lavabit.com>
parents: 1221
diff changeset
753 }