changeset 740:2d9adb33f587

Cleanup of cache.c WRT charset decoders. This patch gets rid of a series of valgrind complains with this page: http://selenic.com/pipermail/mercurial/
author Jorge Arellano Cid <jcid@dillo.org>
date Wed, 07 Jan 2009 15:53:19 -0300
parents cb5fc438936b
children 6e89436361f6
files ChangeLog src/cache.c src/cache.h src/capi.c src/capi.h src/html.cc
diffstat 6 files changed, 89 insertions(+), 100 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Tue Jan 06 20:22:43 2009 +0100
+++ b/ChangeLog	Wed Jan 07 15:53:19 2009 -0300
@@ -48,6 +48,7 @@
  - Allowed the rc parser to skip whitespace around the equal sign.
  - Fixed the parser not to call Html_tag_close_* functions twice.
  - Implemented CSS Stylesheet loading.
+ - Made a big cleanup of cache.c WRT charset decoding (fixes bugs).
    Patches: Jorge Arellano Cid
 
 dw
--- a/src/cache.c	Tue Jan 06 20:22:43 2009 +0100
+++ b/src/cache.c	Wed Jan 07 15:53:19 2009 -0300
@@ -485,40 +485,33 @@
  * Change Content-Type for cache entry found by url.
  * Return new content type.
  */
-const char *a_Cache_set_content_type(const DilloUrl *url, const char *ctype,
-                                     bool_t force)
+const char *a_Cache_set_content_type(const DilloUrl *url, const char *ctype)
 {
+   char *charset;
    const char *curr;
    CacheEntry_t *entry = Cache_entry_search_with_redirect(url);
 
-   if (!entry)
-      return NULL;
+   dReturn_val_if_fail (entry != NULL, NULL);
+
+   MSG("a_Cache_set_content_type {%s} {%s}\n", ctype, URL_STR(url));
 
    curr = Cache_current_content_type(entry);
-   if (entry->TypeMeta && (force == FALSE)) {
-      /* it's already been set */
-      return curr;
-  }
-
-   if (a_Misc_content_type_cmp(curr, ctype)) {
-      char *charset;
-
-      dFree(entry->TypeMeta);
+   if (entry->TypeMeta) {
+      /* Type is already been set. Do nothing.
+       * Multiple META elements? */
+   } else if (a_Misc_content_type_cmp(curr, ctype)) {
+      /* TypeMeta not set, and META gives one different from default */
       curr = entry->TypeMeta = dStrdup(ctype);
-
       if (entry->CharsetDecoder)
          a_Decode_free(entry->CharsetDecoder);
       a_Misc_parse_content_type(ctype, NULL, NULL, &charset);
       entry->CharsetDecoder = a_Decode_charset_init(charset);
       dFree(charset);
 
+      /* Invalidate UTF8Data */
       dStr_free(entry->UTF8Data, 1);
-      if (entry->CharsetDecoder && entry->DataRefcount > 0)
-         entry->UTF8Data = a_Decode_process(entry->CharsetDecoder,
-                                            entry->Data->str,
-                                            entry->Data->len);
-      else
-         entry->UTF8Data = NULL;
+      entry->UTF8Data = NULL;
+
    }
 
    return curr;
@@ -634,7 +627,7 @@
 static void Cache_parse_header(CacheEntry_t *entry)
 {
    char *header = entry->Header->str;
-   char *Length, *Type, *location_str, *encoding, *charset;
+   char *Length, *Type, *location_str, *encoding;
 #ifndef DISABLE_COOKIES
    Dlist *Cookies;
 #endif
@@ -643,6 +636,8 @@
    void *data;
    int i;
 
+   MSG("Cache_parse_header\n");
+
    if (entry->Header->len > 12) {
       if (header[9] == '1' && header[10] == '0' && header[11] == '0') {
          /* 100: Continue. The "real" header has not come yet. */
@@ -753,20 +748,11 @@
          MSG_HTTP("Server didn't send Content-Type in header.\n");
       }
    } else {
+      /* This HTTP Content-Type is not trusted. It's checked against real data
+       * in Cache_process_queue(); only then CA_GotContentType becomes true. */
       entry->TypeHdr = Type;
-      _MSG("Content-Type {%s} {%s}\n", Type, URL_STR(entry->Url));
-      /* This Content-Type is not trusted. It's checked against real data
-       * in Cache_process_queue(); only then CA_GotContentType becomes true.
-       */
-      a_Misc_parse_content_type(Type, NULL, NULL, &charset);
-      if (charset) {
-         entry->CharsetDecoder = a_Decode_charset_init(charset);
-         if (entry->CharsetDecoder) {
-            dStr_free(entry->UTF8Data, 1);
-            entry->UTF8Data = dStr_new("");
-         }
-         dFree(charset);
-      }
+      MSG("TypeHdr  {%s} {%s}\n", Type, URL_STR(entry->Url));
+      MSG("TypeMeta {%s}\n", entry->TypeMeta);
    }
 }
 
@@ -816,15 +802,60 @@
 void a_Cache_process_dbuf(int Op, const char *buf, size_t buf_size,
                           const DilloUrl *Url)
 {
-   int offset = 0;
-   int len;
+   int offset, len;
    const char *str;
+   Dstr *dstr1, *dstr2, *dstr3;
    CacheEntry_t *entry = Cache_entry_search(Url);
 
    /* Assert a valid entry (not aborted) */
    dReturn_if_fail (entry != NULL);
 
-   if (Op == IOClose) {
+   MSG("__a_Cache_process_dbuf__\n");
+
+   if (Op == IORead) {
+      /* 
+       * Cache_get_header() will set CA_GotHeader if it has a full header, and
+       * Cache_parse_header() will unset it if the header ends being
+       * merely an informational response from the server (i.e., 100 Continue)
+       */
+      for (offset = 0; !(entry->Flags & CA_GotHeader) &&
+           (len = Cache_get_header(entry, buf + offset, buf_size - offset));
+           Cache_parse_header(entry) ) {
+         offset += len;
+      }
+
+      if (entry->Flags & CA_GotHeader) {
+         str = buf + offset;
+         len = buf_size - offset;
+         entry->TransferSize += len;
+         dstr1 = dstr2 = dstr3 = NULL;
+
+         /* Decode arrived data (<= 3 stages) */
+         if (entry->TransferDecoder) {
+            dstr1 = a_Decode_process(entry->TransferDecoder, str, len);
+            str = dstr1->str;
+            len = dstr1->len;
+         }
+         if (entry->ContentDecoder) {
+            dstr2 = a_Decode_process(entry->ContentDecoder, str, len);
+            str = dstr2->str;
+            len = dstr2->len;
+         }
+         dStr_append_l(entry->Data, str, len);
+         if (entry->CharsetDecoder && entry->UTF8Data) {
+            dstr3 = a_Decode_process(entry->CharsetDecoder, str, len);
+            dStr_append_l(entry->UTF8Data, dstr3->str, dstr3->len);
+         }
+         dStr_free(dstr1, 1);
+         dStr_free(dstr2, 1);
+         dStr_free(dstr3, 1);
+      
+         if (entry->Data->len)
+            entry->Flags &= ~CA_IsEmpty;
+
+         Cache_process_queue(entry);
+      }
+   } else if (Op == IOClose) {
       if ((entry->Flags & CA_GotLength) &&
           (entry->ExpectedSize != entry->TransferSize)) {
          MSG_HTTP("Content-Length does NOT match message body,\n"
@@ -847,61 +878,11 @@
       if (entry->Flags & CA_GotHeader) {
          Cache_unref_data(entry);
       }
-      return;
+
    } else if (Op == IOAbort) {
       /* unused */
       MSG("a_Cache_process_dbuf Op = IOAbort; not implemented!\n");
-      return;
-   }
-
-   /*
-    * Cache_get_header() will set CA_GotHeader if it has a full header, and
-    * Cache_parse_header() will unset it if the header turns out to have been
-    * merely an informational response from the server (i.e., 100 Continue)
-    */
-   while (!(entry->Flags & CA_GotHeader) &&
-          (len = Cache_get_header(entry, buf + offset, buf_size - offset))) {
-      offset += len;
-      /* Let's scan, allocate, and set things according to header info */
-      Cache_parse_header(entry);
    }
-
-   if (!(entry->Flags & CA_GotHeader))
-      return;
-
-   str = buf + offset;
-   len = buf_size - offset;
-   entry->TransferSize += len;
-
-   if (entry->TransferDecoder) {
-      Dstr *dbuf = a_Decode_process(entry->TransferDecoder, str, len);
-      str = dbuf->str;
-      len = dbuf->len;
-      dStr_free(dbuf, 0);
-   }
-   if (entry->ContentDecoder) {
-      Dstr *dbuf = a_Decode_process(entry->ContentDecoder, str, len);
-      if (entry->TransferDecoder)
-         dFree((char *)str);
-      str = dbuf->str;
-      len = dbuf->len;
-      dStr_free(dbuf, 0);
-   }
-   dStr_append_l(entry->Data, str, len);
-
-   if (entry->UTF8Data) {
-      Dstr *dbuf = a_Decode_process(entry->CharsetDecoder, str, len);
-      dStr_append_l(entry->UTF8Data, dbuf->str, dbuf->len);
-      dStr_free(dbuf, 1);
-   }
-
-   if (entry->TransferDecoder || entry->ContentDecoder)
-      dFree((char *)str);
-
-   if (entry->Data->len)
-      entry->Flags &= ~CA_IsEmpty;
-
-   Cache_process_queue(entry);
 }
 
 /*
--- a/src/cache.h	Tue Jan 06 20:22:43 2009 +0100
+++ b/src/cache.h	Wed Jan 07 15:53:19 2009 -0300
@@ -62,8 +62,7 @@
 int a_Cache_get_buf(const DilloUrl *Url, char **PBuf, int *BufSize);
 void a_Cache_unref_buf(const DilloUrl *Url);
 const char *a_Cache_get_content_type(const DilloUrl *url);
-const char *a_Cache_set_content_type(const DilloUrl *url, const char *ctype,
-                                     bool_t force);
+const char *a_Cache_set_content_type(const DilloUrl *url, const char *ctype);
 uint_t a_Cache_get_flags(const DilloUrl *url);
 void a_Cache_process_dbuf(int Op, const char *buf, size_t buf_size,
                           const DilloUrl *Url);
--- a/src/capi.c	Tue Jan 06 20:22:43 2009 +0100
+++ b/src/capi.c	Wed Jan 07 15:53:19 2009 -0300
@@ -421,10 +421,9 @@
 /*
  * Set the Content-Type for the URL. 
  */
-const char *a_Capi_set_content_type(const DilloUrl *url, const char *ctype,
-                                        bool_t force)
+const char *a_Capi_set_content_type(const DilloUrl *url, const char *ctype)
 {
-   return a_Cache_set_content_type(url, ctype, force);
+   return a_Cache_set_content_type(url, ctype);
 }
 
 /*
--- a/src/capi.h	Tue Jan 06 20:22:43 2009 +0100
+++ b/src/capi.h	Wed Jan 07 15:53:19 2009 -0300
@@ -26,8 +26,7 @@
 int a_Capi_get_buf(const DilloUrl *Url, char **PBuf, int *BufSize);
 void a_Capi_unref_buf(const DilloUrl *Url);
 const char *a_Capi_get_content_type(const DilloUrl *url);
-const char *a_Capi_set_content_type(const DilloUrl *url, const char *ctype,
-                                    bool_t force);
+const char *a_Capi_set_content_type(const DilloUrl *url, const char *ctype);
 int a_Capi_get_flags(const DilloUrl *Url);
 int a_Capi_dpi_send_cmd(DilloUrl *url, void *bw, char *cmd, char *server,
                          int flags);
--- a/src/html.cc	Tue Jan 06 20:22:43 2009 +0100
+++ b/src/html.cc	Wed Jan 07 15:53:19 2009 -0300
@@ -583,7 +583,15 @@
    char *buf = Buf + Start_Ofs;
    int bufsize = BufSize - Start_Ofs;
 
+   MSG("DilloHtml::write BufSize=%d Start_Ofs=%d\n", BufSize, Start_Ofs);
+#if 0
+   char *aux = dStrndup(Buf, BufSize);
+   MSG(" {%s}\n", aux);
+   dFree(aux);
+#endif
+
    dReturn_if_fail (dw != NULL);
+   dReturn_if_fail (stop_parser == FALSE);
 
    Start_Buf = Buf;
    token_start = Html_write_raw(this, buf, bufsize, Eof);
@@ -1583,6 +1591,7 @@
 
       if (html->repush_after_head) {
          html->stop_parser = true;
+         MSG(" [html->stop_parser = true]\n");
          a_Nav_repush(html->bw);
       }
    }
@@ -2789,9 +2798,8 @@
       } else if (!dStrcasecmp(equiv, "content-type") &&
                  (content = a_Html_get_attr(html, tag, tagsize, "content"))) {
          if (a_Misc_content_type_cmp(html->content_type, content)) {
-            const bool_t force = FALSE;
             const char *new_content =
-               a_Capi_set_content_type(html->page_url, content, force);
+               a_Capi_set_content_type(html->page_url, content);
             /* Cannot ask cache whether the content type was changed, as
              * this code in another bw might have already changed it for us.
              */
@@ -2934,7 +2942,7 @@
    }
 }
 
-static void Html_tag_open_default(DilloHtml *html, const char *tag, int tagsize)
+static void Html_tag_open_default(DilloHtml *html,const char *tag,int tagsize)
 {
 }
 
@@ -3311,6 +3319,8 @@
    char *start = tag + 1; /* discard the '<' */
    int IsCloseTag = (*start == '/');
 
+   dReturn_if_fail ( html->stop_parser == false );
+
    ni = a_Html_tag_index(start + IsCloseTag);
    if (ni == -1) {
       /* TODO: doctype parsing is a bit fuzzy, but enough for the time being */
@@ -3391,9 +3401,8 @@
       }
 
       /* Call the open function for this tag */
+      _MSG("Open : %s\n", Tags[ni].name);
       Tags[ni].open (html, tag, tagsize);
-      if (html->stop_parser)
-         break;
 
       /* Request inmediate close for elements with forbidden close tag. */
       /* TODO: XHTML always requires close tags. A simple implementation
@@ -3413,6 +3422,7 @@
            (strchr(" \"'", tag[tagsize-3]) ||                   /* [ "']/> */
             (size_t)tagsize == strlen(Tags[ni].name) + 3))) {   /*  <x/>   */
    
+         _MSG("Close: %s\n", Tags[ni].name);
          Html_tag_cleanup_at_close(html, ni);
          /* This was a close tag */
          html->ReqTagClose = false;