annotate src/misc.c @ 368:2242da885677

- s/todo:/TODO:/g
author jcid
date Tue, 30 Sep 2008 16:32:41 +0200
parents da33058e94be
children b277eed3119c
rev   line source
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
1 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
2 * File: misc.c
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
3 *
35
d9e7b35430de Updated copyright lines
jcid
parents: 0
diff changeset
4 * Copyright (C) 2000 Jörgen Viksell <vsksga@hotmail.com>
d9e7b35430de Updated copyright lines
jcid
parents: 0
diff changeset
5 * Copyright (C) 2000-2007 Jorge Arellano Cid <jcid@dillo.org>,
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
6 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
7 * This program is free software; you can redistribute it and/or modify
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
8 * it under the terms of the GNU General Public License as published by
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
9 * the Free Software Foundation; either version 3 of the License, or
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
10 * (at your option) any later version.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
11 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
12
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
13 #include <stdio.h>
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
14 #include <stdlib.h>
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
15 #include <unistd.h>
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
16 #include <string.h>
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
17 #include <ctype.h>
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
18
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
19 #include "msg.h"
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
20 #include "misc.h"
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
21
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
22
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
23 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
24 * Escape characters as %XX sequences.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
25 * Return value: New string.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
26 */
306
7a76f872ce73 - Eliminated gcc 4.2.3 warnings on 64bit OS.
jcid
parents: 274
diff changeset
27 char *a_Misc_escape_chars(const char *str, const char *esc_set)
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
28 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
29 static const char *hex = "0123456789ABCDEF";
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
30 char *p = NULL;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
31 Dstr *dstr;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
32 int i;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
33
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
34 dstr = dStr_sized_new(64);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
35 for (i = 0; str[i]; ++i) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
36 if (str[i] <= 0x1F || str[i] == 0x7F || strchr(esc_set, str[i])) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
37 dStr_append_c(dstr, '%');
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
38 dStr_append_c(dstr, hex[(str[i] >> 4) & 15]);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
39 dStr_append_c(dstr, hex[str[i] & 15]);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
40 } else {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
41 dStr_append_c(dstr, str[i]);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
42 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
43 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
44 p = dstr->str;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
45 dStr_free(dstr, FALSE);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
46
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
47 return p;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
48 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
49
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
50
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
51 #define TAB_SIZE 8
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
52 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
53 * Takes a string and converts any tabs to spaces.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
54 */
340
da33058e94be - Fixed a memory leak in plain.cc.
jcid
parents: 306
diff changeset
55 char *a_Misc_expand_tabs(const char *str, int len)
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
56 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
57 Dstr *New = dStr_new("");
340
da33058e94be - Fixed a memory leak in plain.cc.
jcid
parents: 306
diff changeset
58 int i, j, pos, old_pos;
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
59 char *val;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
60
340
da33058e94be - Fixed a memory leak in plain.cc.
jcid
parents: 306
diff changeset
61 if (len) {
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
62 for (pos = 0, i = 0; i < len; i++) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
63 if (str[i] == '\t') {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
64 /* Fill with whitespaces until the next tab. */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
65 old_pos = pos;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
66 pos += TAB_SIZE - (pos % TAB_SIZE);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
67 for (j = old_pos; j < pos; j++)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
68 dStr_append_c(New, ' ');
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
69 } else {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
70 dStr_append_c(New, str[i]);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
71 pos++;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
72 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
73 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
74 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
75 val = New->str;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
76 dStr_free(New, FALSE);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
77 return val;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
78 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
79
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
80 /* TODO: could use dStr ADT! */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
81 typedef struct ContentType_ {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
82 const char *str;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
83 int len;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
84 } ContentType_t;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
85
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
86 static const ContentType_t MimeTypes[] = {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
87 { "application/octet-stream", 24 },
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
88 { "text/html", 9 },
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
89 { "text/plain", 10 },
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
90 { "image/gif", 9 },
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
91 { "image/png", 9 },
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
92 { "image/jpeg", 10 },
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
93 { NULL, 0 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
94 };
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
95
250
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
96 typedef enum {
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
97 DT_OCTET_STREAM = 0,
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
98 DT_TEXT_HTML,
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
99 DT_TEXT_PLAIN,
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
100 DT_IMAGE_GIF,
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
101 DT_IMAGE_PNG,
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
102 DT_IMAGE_JPG,
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
103 } DetectedContentType;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
104
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
105 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
106 * Detects 'Content-Type' from a data stream sample.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
107 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
108 * It uses the magic(5) logic from file(1). Currently, it
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
109 * only checks the few mime types that Dillo supports.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
110 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
111 * 'Data' is a pointer to the first bytes of the raw data.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
112 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
113 * Return value: (0 on success, 1 on doubt, 2 on lack of data).
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
114 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
115 int a_Misc_get_content_type_from_data(void *Data, size_t Size, const char **PT)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
116 {
250
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
117 size_t i, non_ascci, non_ascci_text, bin_chars;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
118 char *p = Data;
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
119 int st = 1; /* default to "doubt' */
250
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
120 DetectedContentType Type = DT_OCTET_STREAM; /* default to binary */
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
121
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
122 /* HTML try */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
123 for (i = 0; i < Size && isspace(p[i]); ++i);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
124 if ((Size - i >= 5 && !dStrncasecmp(p+i, "<html", 5)) ||
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
125 (Size - i >= 5 && !dStrncasecmp(p+i, "<head", 5)) ||
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
126 (Size - i >= 6 && !dStrncasecmp(p+i, "<title", 6)) ||
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
127 (Size - i >= 14 && !dStrncasecmp(p+i, "<!doctype html", 14)) ||
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
128 /* this line is workaround for FTP through the Squid proxy */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
129 (Size - i >= 17 && !dStrncasecmp(p+i, "<!-- HTML listing", 17))) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
130
250
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
131 Type = DT_TEXT_HTML;
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
132 st = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
133 /* Images */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
134 } else if (Size >= 4 && !dStrncasecmp(p, "GIF8", 4)) {
250
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
135 Type = DT_IMAGE_GIF;
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
136 st = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
137 } else if (Size >= 4 && !dStrncasecmp(p, "\x89PNG", 4)) {
250
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
138 Type = DT_IMAGE_PNG;
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
139 st = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
140 } else if (Size >= 2 && !dStrncasecmp(p, "\xff\xd8", 2)) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
141 /* JPEG has the first 2 bytes set to 0xffd8 in BigEndian - looking
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
142 * at the character representation should be machine independent. */
250
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
143 Type = DT_IMAGE_JPG;
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
144 st = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
145
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
146 /* Text */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
147 } else {
250
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
148 /* Heuristic for "text/plain"
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
149 * {ASCII, LATIN1, UTF8, KOI8-R, CP-1251}
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
150 * All in the above set regard [00-31] as control characters.
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
151 * LATIN1: [7F-9F] unused
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
152 * CP-1251 {7F,98} unused (two characters).
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
153 *
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
154 * We'll use [0-31] as indicators of non-text content.
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
155 * Better heuristics are welcomed! :-) */
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
156
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
157 non_ascci = non_ascci_text = bin_chars = 0;
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
158 Size = MIN (Size, 256);
250
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
159 for (i = 0; i < Size; i++) {
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
160 int ch = (uchar_t) p[i];
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
161 if (ch < 32 && !isspace(ch))
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
162 ++bin_chars;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
163 if (ch > 126)
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
164 ++non_ascci;
250
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
165 if (ch > 190)
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
166 ++non_ascci_text;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
167 }
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
168 if (bin_chars == 0) {
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
169 /* Let's say text: if "rare" chars are <= 10% */
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
170 if ((non_ascci - non_ascci_text) <= Size/10)
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
171 Type = DT_TEXT_PLAIN;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
172 }
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
173 if (Size == 256)
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
174 st = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
175 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
176
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
177 *PT = MimeTypes[Type].str;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
178 return st;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
179 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
180
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
181 /*
250
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
182 * Parse Content-Type string, e.g., "text/html; charset=utf-8".
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
183 */
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
184 void a_Misc_parse_content_type(const char *str, char **major, char **minor,
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
185 char **charset)
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
186 {
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
187 const char *s;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
188
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
189 if (major)
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
190 *major = NULL;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
191 if (minor)
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
192 *minor = NULL;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
193 if (charset)
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
194 *charset = NULL;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
195 if (!str)
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
196 return;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
197
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
198 for (s = str; isalnum(*s) || (*s == '-'); s++);
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
199 if (major)
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
200 *major = dStrndup(str, s - str);
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
201
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
202 if (*s == '/') {
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
203 for (str = ++s; isalnum(*s) || (*s == '-'); s++);
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
204 if (minor)
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
205 *minor = dStrndup(str, s - str);
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
206 }
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
207
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
208 if (charset && *s) {
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
209 const char terminators[] = " ;\t";
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
210 const char key[] = "charset";
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
211
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
212 if ((s = dStristr(str, key)) &&
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
213 (s == str || strchr(terminators, s[-1]))) {
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
214 s += sizeof(key) - 1;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
215 for ( ; *s == ' ' || *s == '\t'; ++s);
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
216 if (*s == '=') {
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
217 size_t len;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
218 for (++s; *s == ' ' || *s == '\t'; ++s);
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
219 if ((len = strcspn(s, terminators))) {
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
220 if (*s == '"' && s[len-1] == '"' && len > 1) {
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
221 /* quoted string */
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
222 s++;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
223 len -= 2;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
224 }
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
225 *charset = dStrndup(s, len);
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
226 }
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
227 }
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
228 }
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
229 }
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
230 }
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
231
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
232 /*
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
233 * Compare two Content-Type strings.
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
234 * Return 0 if they are equivalent, and 1 otherwise.
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
235 */
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
236 int a_Misc_content_type_cmp(const char *ct1, const char *ct2)
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
237 {
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
238 char *major1, *major2, *minor1, *minor2, *charset1, *charset2;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
239 int ret;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
240
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
241 if ((!ct1 || !*ct1) && (!ct2 || !*ct2))
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
242 return 0;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
243 if ((!ct1 || !*ct1) || (!ct2 || !*ct2))
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
244 return 1;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
245
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
246 a_Misc_parse_content_type(ct1, &major1, &minor1, &charset1);
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
247 a_Misc_parse_content_type(ct2, &major2, &minor2, &charset2);
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
248
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
249 if (major1 && major2 && !dStrcasecmp(major1, major2) &&
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
250 minor1 && minor2 && !dStrcasecmp(minor1, minor2) &&
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
251 ((!charset1 && !charset2) ||
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
252 (charset1 && charset2 && !dStrcasecmp(charset1, charset2)) ||
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
253 (!charset1 && charset2 && !dStrcasecmp(charset2, "UTF-8")) ||
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
254 (charset1 && !charset2 && !dStrcasecmp(charset1, "UTF-8")))) {
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
255 ret = 0;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
256 } else {
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
257 ret = 1;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
258 }
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
259 dFree(major1); dFree(major2);
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
260 dFree(minor1); dFree(minor2);
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
261 dFree(charset1); dFree(charset2);
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
262
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
263 return ret;
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
264 }
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
265
fce9380ee68a - Switched from charset to content-type for handling data.
jcid
parents: 180
diff changeset
266 /*
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
267 * Check the server-supplied 'Content-Type' against our detected type.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
268 * (some servers seem to default to "text/plain").
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
269 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
270 * Return value:
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
271 * 0, if they match
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
272 * -1, if a mismatch is detected
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
273 *
155
fb8da086d7da + s/there're/there are/g
jcid
parents: 35
diff changeset
274 * There are many MIME types Dillo doesn't know, they're handled
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
275 * as "application/octet-stream" (as the SPEC says).
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
276 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
277 * A mismatch happens when receiving a binary stream as
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
278 * "text/plain" or "text/html", or an image that's not an image of its kind.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
279 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
280 * Note: this is a basic security procedure.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
281 *
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
282 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
283 int a_Misc_content_type_check(const char *EntryType, const char *DetectedType)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
284 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
285 int i;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
286 int st = -1;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
287
274
1deca4cad4c4 - Implemented OBJECT as link (similar to FRAME).
jcid
parents: 250
diff changeset
288 _MSG("Type check: [Srv: %s Det: %s]\n", EntryType, DetectedType);
0
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
289
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
290 if (!EntryType)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
291 return 0; /* there's no mismatch without server type */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
292
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
293 for (i = 1; MimeTypes[i].str; ++i)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
294 if (dStrncasecmp(EntryType, MimeTypes[i].str, MimeTypes[i].len) == 0)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
295 break;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
296
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
297 if (!MimeTypes[i].str) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
298 /* type not found, no mismatch */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
299 st = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
300 } else if (dStrncasecmp(EntryType, "image/", 6) == 0 &&
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
301 !dStrncasecmp(DetectedType,MimeTypes[i].str,MimeTypes[i].len)){
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
302 /* An image, and there's an exact match */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
303 st = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
304 } else if (dStrncasecmp(EntryType, "text/", 5) ||
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
305 dStrncasecmp(DetectedType, "application/", 12)) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
306 /* Not an application sent as text */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
307 st = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
308 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
309
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
310 return st;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
311 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
312
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
313 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
314 * Parse a geometry string.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
315 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
316 int a_Misc_parse_geometry(char *str, int *x, int *y, int *w, int *h)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
317 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
318 char *p, *t1, *t2;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
319 int n1, n2;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
320 int ret = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
321
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
322 if ((p = strchr(str, 'x')) || (p = strchr(str, 'X'))) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
323 n1 = strtol(str, &t1, 10);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
324 n2 = strtol(++p, &t2, 10);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
325 if (t1 != str && t2 != p) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
326 *w = n1;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
327 *h = n2;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
328 ret = 1;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
329 /* parse x,y now */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
330 p = t2;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
331 n1 = strtol(p, &t1, 10);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
332 n2 = strtol(t1, &t2, 10);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
333 if (t1 != p && t2 != t1) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
334 *x = n1;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
335 *y = n2;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
336 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
337 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
338 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
339 _MSG("geom: w,h,x,y = (%d,%d,%d,%d)\n", *w, *h, *x, *y);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
340 return ret;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
341 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
342
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
343 /*
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
344 * Encodes string using base64 encoding.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
345 * Return value: new string or NULL if input string is empty.
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
346 */
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
347 char *a_Misc_encode_base64(const char *in)
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
348 {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
349 static const char *base64_hex = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
350 "abcdefghijklmnopqrstuvwxyz"
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
351 "0123456789+/";
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
352 char *out = NULL;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
353 int len, i = 0;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
354
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
355 if (in == NULL) return NULL;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
356 len = strlen(in);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
357
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
358 out = (char *)dMalloc((len + 2) / 3 * 4 + 1);
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
359
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
360 for (; len >= 3; len -= 3) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
361 out[i++] = base64_hex[in[0] >> 2];
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
362 out[i++] = base64_hex[((in[0]<<4) & 0x30) | (in[1]>>4)];
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
363 out[i++] = base64_hex[((in[1]<<2) & 0x3c) | (in[2]>>6)];
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
364 out[i++] = base64_hex[in[2] & 0x3f];
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
365 in += 3;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
366 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
367
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
368 if (len > 0) {
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
369 unsigned char fragment;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
370 out[i++] = base64_hex[in[0] >> 2];
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
371 fragment = (in[0] << 4) & 0x30;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
372 if (len > 1) fragment |= in[1] >> 4;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
373 out[i++] = base64_hex[fragment];
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
374 out[i++] = (len < 2) ? '=' : base64_hex[(in[1] << 2) & 0x3c];
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
375 out[i++] = '=';
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
376 }
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
377 out[i] = '\0';
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
378 return out;
6ee11bf9e3ea Initial revision
jcid
parents:
diff changeset
379 }
180
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
380
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
381 /*
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
382 * Load a local file into a dStr.
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
383 * Return value: dStr on success, NULL on error.
368
2242da885677 - s/todo:/TODO:/g
jcid
parents: 340
diff changeset
384 * TODO: a filesize threshold may be implemented.
180
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
385 */
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
386 Dstr *a_Misc_file2dstr(const char *filename)
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
387 {
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
388 FILE *F_in;
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
389 int n;
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
390 char buf[4096];
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
391 Dstr *dstr = NULL;
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
392
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
393 if ((F_in = fopen(filename, "r"))) {
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
394 dstr = dStr_sized_new(4096);
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
395 while ((n = fread (buf, 1, 4096, F_in)) > 0) {
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
396 dStr_append_l(dstr, buf, n);
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
397 }
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
398 fclose(F_in);
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
399 }
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
400 return dstr;
0038a2943cc2 - Made file inputs free the loaded file after the page is left.
jcid
parents: 155
diff changeset
401 }