changeset 1272:83a462cf5d02

follow RFC a bit more closely for Content-Type parsing From looking at the IANA's currently-assigned media types, allowing '+' and '.' would have been enough.
author corvid <corvid@lavabit.com>
date Mon, 10 Aug 2009 03:42:59 +0000
parents 3e784a367043
children 9627c06a92ab
files src/misc.c
diffstat 1 files changed, 10 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/src/misc.c	Fri Aug 07 18:27:48 2009 -0400
+++ b/src/misc.c	Mon Aug 10 03:42:59 2009 +0000
@@ -192,10 +192,12 @@
 
 /*
  * Parse Content-Type string, e.g., "text/html; charset=utf-8".
+ * Content-Type is defined in RFC 2045 section 5.1.
  */
 void a_Misc_parse_content_type(const char *str, char **major, char **minor,
                                char **charset)
 {
+   static const char tspecials_space[] = "()<>@,;:\\\"/[]?= ";
    const char *s;
    bool_t is_text;
 
@@ -208,19 +210,24 @@
    if (!str)
       return;
 
-   for (s = str; dIsalnum(*s) || (*s == '-'); s++);
+   for (s = str; *s && !iscntrl((uchar_t)*s) && !strchr(tspecials_space, *s);
+        s++) ;
    if (major)
       *major = dStrndup(str, s - str);
    is_text = (s - str == 4) && !dStrncasecmp(str, "text", 4);
 
    if (*s == '/') {
-      for (str = ++s; dIsalnum(*s) || (*s == '-'); s++);
+      for (str = ++s;
+           *s && !iscntrl((uchar_t)*s) && !strchr(tspecials_space, *s); s++) ;
       if (minor)
          *minor = dStrndup(str, s - str);
    }
 
    if (is_text && charset && *s) {
-      /* charset parameter is defined for text media type (RFC 2046) */
+      /* charset parameter is defined for text media type (RFC 2046).
+       * Note that is_text will no longer suffice if dillo begins to
+       * handle xhtml someday.
+       */
       const char terminators[] = " ;\t";
       const char key[] = "charset";