changeset 260:81fc5011ea56

- Moved charset decoding into cache.
author jcid
date Mon, 02 Jun 2008 04:27:43 +0200
parents ea5982fb5dd9
children b4a68758b032
files ChangeLog src/cache.c src/cache.h src/capi.c src/capi.h src/dialog.cc src/dialog.hh src/html.cc src/nav.c src/nav.h src/plain.cc src/uicmd.cc
diffstat 12 files changed, 200 insertions(+), 103 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Mon Jun 02 02:42:24 2008 +0200
+++ b/ChangeLog	Mon Jun 02 04:27:43 2008 +0200
@@ -105,6 +105,7 @@
  - Made zlib a configure requirement, and cleaned up configure.in.
  - Fixed a segfault bug in Nav.c.
  - Switched from charset to content-type for handling data.
+ - Moved charset decoding into cache.
    Patches: place (AKA corvid)
 +- Fixed a problem with locally-installed dpis.
  - Added code for optional image loading (nice interface) very advanced!
--- a/src/cache.c	Mon Jun 02 02:42:24 2008 +0200
+++ b/src/cache.c	Mon Jun 02 04:27:43 2008 +0200
@@ -58,11 +58,14 @@
    Dstr *Header;             /* HTTP header */
    const DilloUrl *Location; /* New URI for redirects */
    Dstr *Data;               /* Pointer to raw data */
+   Dstr *UTF8Data;           /* Data after charset translation */
+   int DataRefcount;         /* Reference count */
    Decode *TransferDecoder;  /* Transfer decoder (e.g., chunked) */
    Decode *ContentDecoder;   /* Data decoder (e.g., gzip) */
+   Decode *CharsetDecoder;   /* Translates text to UTF-8 encoding */
    int ExpectedSize;         /* Goal size of the HTTP transfer (0 if unknown)*/
    int TransferSize;         /* Actual length of the HTTP transfer */
-   uint_t Flags;             /* Look Flag Defines in cache.h */
+   uint_t Flags;             /* See Flag Defines in cache.h */
 } CacheEntry_t;
 
 
@@ -207,8 +210,11 @@
    NewEntry->Header = dStr_new("");
    NewEntry->Location = NULL;
    NewEntry->Data = dStr_sized_new(8*1024);
+   NewEntry->UTF8Data = NULL;
+   NewEntry->DataRefcount = 0;
    NewEntry->TransferDecoder = NULL;
    NewEntry->ContentDecoder = NULL;
+   NewEntry->CharsetDecoder = NULL;
    NewEntry->ExpectedSize = 0;
    NewEntry->TransferSize = 0;
    NewEntry->Flags = CA_IsEmpty;
@@ -296,6 +302,9 @@
    dStr_free(entry->Header, TRUE);
    a_Url_free((DilloUrl *)entry->Location);
    dStr_free(entry->Data, 1);
+   dStr_free(entry->UTF8Data, 1);
+   if (entry->CharsetDecoder)
+      a_Decode_free(entry->CharsetDecoder);
    dFree(entry);
 }
 
@@ -393,6 +402,43 @@
 }
 
 /*
+ * Reference the cache data.
+ */
+static void Cache_ref_data(CacheEntry_t *entry)
+{
+   if (entry) {
+      entry->DataRefcount++;
+      _MSG("DataRefcount++: %d\n", entry->DataRefcount);
+      if (entry->CharsetDecoder && entry->DataRefcount == 1) {
+         entry->UTF8Data = a_Decode_process(entry->CharsetDecoder,
+                                            entry->Data->str,
+                                            entry->Data->len);
+      }
+   }
+}
+
+/*
+ * Unreference the cache data.
+ */
+static void Cache_unref_data(CacheEntry_t *entry)
+{
+   if (entry) {
+      entry->DataRefcount--;
+      _MSG("DataRefcount--: %d\n", entry->DataRefcount);
+
+      if (entry->CharsetDecoder) {
+         if (entry->DataRefcount == 0) {
+            dStr_free(entry->UTF8Data, 1);
+            entry->UTF8Data = NULL;
+         } else if (entry->DataRefcount < 0) {
+            MSG_ERR("Cache_unref_data: negative refcount\n");
+            entry->DataRefcount = 0;
+         }
+      }
+   }
+}
+
+/*
  * Get current content type.
  */
 static const char *Cache_current_content_type(CacheEntry_t *entry)
@@ -412,25 +458,54 @@
 }
 
 /*
+ * Get pointer to entry's data.
+ */
+static Dstr *Cache_data(CacheEntry_t *entry)
+{
+   return entry->UTF8Data ? entry->UTF8Data : entry->Data;
+}
+
+/*
  * Change Content-Type for cache entry found by url.
  * Return new content type.
  */
 const char *a_Cache_set_content_type(const DilloUrl *url, const char *ctype,
                                      bool_t force)
 {
-   const char *ret;
+   const char *curr;
    CacheEntry_t *entry = Cache_entry_search_with_redirect(url);
 
-   if (!entry) {
-      ret = NULL;
-   } else {
-      if (force == TRUE || entry->TypeMeta == NULL) {
-         dFree(entry->TypeMeta);
-         entry->TypeMeta = dStrdup(ctype);
-      }
-      ret = entry->TypeMeta;
+   if (!entry)
+      return NULL;
+
+   curr = Cache_current_content_type(entry);
+   if (entry->TypeMeta && (force == FALSE)) {
+      /* it's already been set */
+      return curr;
+  }
+
+   if (a_Misc_content_type_cmp(curr, ctype)) {
+      char *charset;
+
+      dFree(entry->TypeMeta);
+      curr = entry->TypeMeta = dStrdup(ctype);
+
+      if (entry->CharsetDecoder)
+         a_Decode_free(entry->CharsetDecoder);
+      a_Misc_parse_content_type(ctype, NULL, NULL, &charset);
+      entry->CharsetDecoder = a_Decode_charset_init(charset);
+      dFree(charset);
+
+      dStr_free(entry->UTF8Data, 1);
+      if (entry->CharsetDecoder && entry->DataRefcount > 0)
+         entry->UTF8Data = a_Decode_process(entry->CharsetDecoder,
+                                            entry->Data->str,
+                                            entry->Data->len);
+      else
+         entry->UTF8Data = NULL;
    }
-   return ret;
+
+   return curr;
 }
 
 /*
@@ -440,12 +515,28 @@
 int a_Cache_get_buf(const DilloUrl *Url, char **PBuf, int *BufSize)
 {
    CacheEntry_t *entry = Cache_entry_search_with_redirect(Url);
-   *BufSize = (entry) ? entry->Data->len : 0;
-   *PBuf = (entry) ? entry->Data->str : NULL;
+   if (entry) {
+      Cache_ref_data(entry);
+      Dstr *data = Cache_data(entry);
+      *PBuf = data->str;
+      *BufSize = data->len;
+   } else {
+      *PBuf = NULL;
+      *BufSize = 0;
+   }
    return (entry ? 1 : 0);
 }
 
 /*
+ * Unreference the data buffer when no longer using it.
+ */
+void a_Cache_unref_buf(const DilloUrl *Url)
+{
+   Cache_unref_data(Cache_entry_search_with_redirect(Url));
+}
+
+
+/*
  * Extract a single field from the header, allocating and storing the value
  * in 'field'. ('fieldname' must not include the trailing ':')
  * Return a new string with the field-content if found (NULL on error)
@@ -526,7 +617,7 @@
 static void Cache_parse_header(CacheEntry_t *entry)
 {
    char *header = entry->Header->str;
-   char *Length, *Type, *location_str, *encoding;
+   char *Length, *Type, *location_str, *encoding, *charset;
 #ifndef DISABLE_COOKIES
    Dlist *Cookies;
 #endif
@@ -622,6 +713,7 @@
       dStr_free(entry->Data, 1);
       entry->Data = dStr_sized_new(MIN(entry->ExpectedSize, MAX_INIT_BUF));
    }
+   Cache_ref_data(entry);
 
    /* Get Content-Type */
    if ((Type = Cache_parse_field(header, "Content-Type")) == NULL) {
@@ -635,6 +727,13 @@
       /* This Content-Type is not trusted. It's checked against real data
        * in Cache_process_queue(); only then CA_GotContentType becomes true.
        */
+      a_Misc_parse_content_type(Type, NULL, NULL, &charset);
+      if (charset) {
+         entry->CharsetDecoder = a_Decode_charset_init(charset);
+         if (entry->CharsetDecoder)
+            entry->UTF8Data = dStr_new("");
+         dFree(charset);
+      }
    }
 }
 
@@ -712,6 +811,9 @@
       }
       dStr_fit(entry->Data);                /* fit buffer size! */
       Cache_process_queue(entry);
+      if (entry->Flags & CA_GotHeader) {
+         Cache_unref_data(entry);
+      }
       return;
    } else if (Op == IOAbort) {
       /* unused */
@@ -753,6 +855,13 @@
       dStr_free(dbuf, 0);
    }
    dStr_append_l(entry->Data, str, len);
+
+   if (entry->UTF8Data) {
+      Dstr *dbuf = a_Decode_process(entry->CharsetDecoder, str, len);
+      dStr_append_l(entry->UTF8Data, dbuf->str, dbuf->len);
+      dStr_free(dbuf, 1);
+   }
+
    if (entry->TransferDecoder || entry->ContentDecoder)
       dFree((char *)str);
 
@@ -860,6 +969,7 @@
    uint_t i;
    int st;
    const char *Type;
+   Dstr *data;
    CacheClient_t *Client;
    DilloWeb *ClientWeb;
    BrowserWindow *Client_bw = NULL;
@@ -954,8 +1064,14 @@
          }
 
          /* Send data to our client */
-         if ((Client->BufSize = entry->Data->len) > 0) {
-            Client->Buf = entry->Data->str;
+         if (ClientWeb->flags & WEB_Download) {
+            /* for download, always provide original data, not translated */
+            data = entry->Data;
+         } else {
+            data = Cache_data(entry);
+         }
+         if ((Client->BufSize = data->len) > 0) {
+            Client->Buf = data->str;
             (Client->Callback)(CA_Send, Client);
          }
 
@@ -1001,10 +1117,12 @@
  */
 static void Cache_delayed_process_queue_callback(void *data)
 {
-   void *entry;
+   CacheEntry_t *entry;
 
-   while ((entry = dList_nth_data(DelayedQueue, 0))) {
-      Cache_process_queue((CacheEntry_t *)entry);
+   while ((entry = (CacheEntry_t *)dList_nth_data(DelayedQueue, 0))) {
+      Cache_ref_data(entry);
+      Cache_process_queue(entry);
+      Cache_unref_data(entry);
       /* note that if Cache_process_queue removes the entry,
        * the following dList_remove has no effect. */
       dList_remove(DelayedQueue, entry);
--- a/src/cache.h	Mon Jun 02 02:42:24 2008 +0200
+++ b/src/cache.h	Mon Jun 02 04:27:43 2008 +0200
@@ -59,6 +59,7 @@
 void a_Cache_init(void);
 int a_Cache_open_url(void *Web, CA_Callback_t Call, void *CbData);
 int a_Cache_get_buf(const DilloUrl *Url, char **PBuf, int *BufSize);
+void a_Cache_unref_buf(const DilloUrl *Url);
 const char *a_Cache_get_content_type(const DilloUrl *url);
 const char *a_Cache_set_content_type(const DilloUrl *url, const char *ctype,
                                      bool_t force);
--- a/src/capi.c	Mon Jun 02 02:42:24 2008 +0200
+++ b/src/capi.c	Mon Jun 02 04:27:43 2008 +0200
@@ -406,6 +406,14 @@
 }
 
 /*
+ * Unref the cache's buffer when no longer using it.
+ */
+void a_Capi_unref_buf(const DilloUrl *Url)
+{
+   a_Cache_unref_buf(Url);
+}
+
+/*
  * Get the Content-Type associated with the URL
  */
 const char *a_Capi_get_content_type(const DilloUrl *url)
--- a/src/capi.h	Mon Jun 02 02:42:24 2008 +0200
+++ b/src/capi.h	Mon Jun 02 04:27:43 2008 +0200
@@ -24,6 +24,7 @@
 void a_Capi_init(void);
 int a_Capi_open_url(DilloWeb *web, CA_Callback_t Call, void *CbData);
 int a_Capi_get_buf(const DilloUrl *Url, char **PBuf, int *BufSize);
+void a_Capi_unref_buf(const DilloUrl *Url);
 const char *a_Capi_get_content_type(const DilloUrl *url);
 const char *a_Capi_set_content_type(const DilloUrl *url, const char *ctype,
                                     bool_t force);
--- a/src/dialog.cc	Mon Jun 02 02:42:24 2008 +0200
+++ b/src/dialog.cc	Mon Jun 02 04:27:43 2008 +0200
@@ -118,9 +118,9 @@
 }
 
 /*
- * Show a new window with the provided text
+ * Make a new window with the provided text
  */
-void a_Dialog_text_window(const char *txt, const char *title)
+void *a_Dialog_make_text_window(const char *txt, const char *title)
 {
    //int wh = 600, ww = 650, bh = 30;
    int wh = prefs.height, ww = prefs.width, bh = 30;
@@ -151,7 +151,15 @@
 
    window->resizable(td);
    window->end();
-   window->show();
+   return window;
+}
+
+/*
+ * Show a window.
+ */
+void a_Dialog_show_text_window(void *vWindow)
+{
+   ((Window *)vWindow)->show();
 }
 
 /*--------------------------------------------------------------------------*/
--- a/src/dialog.hh	Mon Jun 02 02:42:24 2008 +0200
+++ b/src/dialog.hh	Mon Jun 02 04:27:43 2008 +0200
@@ -19,7 +19,8 @@
                                  const char *pattern, const char *fname);
 char *a_Dialog_open_file(const char *msg,
                          const char *pattern, const char *fname);
-void a_Dialog_text_window(const char *txt, const char *title);
+void *a_Dialog_make_text_window(const char *txt, const char *title);
+void a_Dialog_show_text_window(void *vWindow);
 
 #ifdef __cplusplus
 }
--- a/src/html.cc	Mon Jun 02 02:42:24 2008 +0200
+++ b/src/html.cc	Mon Jun 02 04:27:43 2008 +0200
@@ -341,9 +341,8 @@
    /* Variables required at parsing time                                 */
    /* -------------------------------------------------------------------*/
    size_t Buf_Consumed; /* amount of source from cache consumed */
-   Dstr *Local_Buf;    /* source converted to displayable encoding (UTF-8) */
-   int Local_Ofs;
-   Decode *decoder;
+   char *Start_Buf;
+   int Start_Ofs;
    char *content_type, *charset;
    bool stop_parser;
 
@@ -794,8 +793,8 @@
 
    /* Init for-parsing variables */
    Buf_Consumed = 0;
-   Local_Buf = dStr_new("");
-   Local_Ofs = 0;
+   Start_Buf = NULL;
+   Start_Ofs = 0;
 
    MSG("DilloHtml(): content type: %s\n", content_type);
    this->content_type = dStrdup(content_type);
@@ -803,7 +802,6 @@
    /* get charset */
    a_Misc_parse_content_type(content_type, NULL, NULL, &charset);
 
-   decoder = a_Decode_charset_init(charset);
    stop_parser = false;
 
    CurrTagOfs = 0;
@@ -954,37 +952,17 @@
 void DilloHtml::write(char *Buf, int BufSize, int Eof)
 {
    int token_start;
-   Dstr *new_text = NULL;
+   char *buf = Buf + Start_Ofs;
+   int bufsize = BufSize - Start_Ofs;
 
    dReturn_if_fail (dw != NULL);
 
-   char *str = Buf + Buf_Consumed;
-   int len = BufSize - Buf_Consumed;
-
-   /* decode to target charset (UTF-8) */
-   if (decoder) {
-      new_text = a_Decode_process(decoder, str, len);
-      str = new_text->str;
-      len = new_text->len;
-   }
-   dStr_append_l(Local_Buf, str, len);
-   dStr_free(new_text, 1);
-
-   token_start = Html_write_raw(this, Local_Buf->str + Local_Ofs,
-                                Local_Buf->len - Local_Ofs, Eof);
-   Buf_Consumed = BufSize;
-   Local_Ofs += token_start;
-
-   /* update line number and tag offset */
-   getCurTagLineNumber();
-
-   /* don't need anything further back */
-   dStr_erase(Local_Buf, 0, CurrTagOfs);
-   Local_Ofs -= CurrTagOfs;
-   OldTagOfs = CurrTagOfs = 0;
+   Start_Buf = Buf;
+   token_start = Html_write_raw(this, buf, bufsize, Eof);
+   Start_Ofs += token_start;
 
    if (bw)
-      a_UIcmd_set_page_prog(bw, BufSize, 1);
+      a_UIcmd_set_page_prog(bw, Start_Ofs, 1);
 }
 
 /*
@@ -994,7 +972,7 @@
 int DilloHtml::getCurTagLineNumber()
 {
    int i, ofs, line;
-   const char *p = Local_Buf->str;
+   const char *p = Start_Buf;
 
    dReturn_val_if_fail(p != NULL, -1);
 
@@ -1018,9 +996,6 @@
 
    dStr_free(Stash, TRUE);
    dStr_free(attr_data, TRUE);
-
-   a_Decode_free(decoder);
-   dStr_free(Local_Buf, TRUE);
    dFree(content_type);
    dFree(charset);
 }
@@ -5945,7 +5920,7 @@
                buf_index = bufsize;
          } else {
             /* Tag: search end of tag (skipping over quoted strings) */
-            html->CurrTagOfs = html->Local_Ofs + token_start;
+            html->CurrTagOfs = html->Start_Ofs + token_start;
 
             while ( buf_index < bufsize ) {
                buf_index++;
--- a/src/nav.c	Mon Jun 02 02:42:24 2008 +0200
+++ b/src/nav.c	Mon Jun 02 04:27:43 2008 +0200
@@ -557,3 +557,10 @@
    return a_Capi_get_buf(Url, PBuf, BufSize);
 }
 
+/*
+ * Wrapper for a_Capi_unref_buf().
+ */
+void a_Nav_unref_buf(const DilloUrl *Url)
+{
+   return a_Capi_unref_buf(Url);
+}
--- a/src/nav.h	Mon Jun 02 02:42:24 2008 +0200
+++ b/src/nav.h	Mon Jun 02 04:27:43 2008 +0200
@@ -33,6 +33,7 @@
 void a_Nav_save_url(BrowserWindow *bw,
                     const DilloUrl *url, const char *filename);
 int a_Nav_get_buf(const DilloUrl *Url, char **PBuf, int *BufSize);
+void a_Nav_unref_buf(const DilloUrl *Url);
 
 #ifdef __cplusplus
 }
--- a/src/plain.cc	Mon Jun 02 02:42:24 2008 +0200
+++ b/src/plain.cc	Mon Jun 02 04:27:43 2008 +0200
@@ -22,7 +22,6 @@
 #include "bw.h"
 #include "web.hh"
 #include "misc.h"
-#include "decode.h"
 
 #include "uicmd.hh"
 
@@ -49,16 +48,12 @@
    BrowserWindow *bw;
    DilloUrl *url;
 
-   Decode *decoder;
-   size_t Buf_Consumed;
-   char *content_type, *charset;
-
    Widget *dw;
    style::Style *widgetStyle;
+   size_t Start_Ofs;    /* Offset of where to start reading next */
    int state;
 
-   DilloPlain(BrowserWindow *bw, const DilloUrl *url,
-              const char *content_type);
+   DilloPlain(BrowserWindow *bw, const DilloUrl *url);
    ~DilloPlain();
 
    void write(void *Buf, uint_t BufSize, int Eof);
@@ -88,8 +83,7 @@
 /*
  * Diplain constructor.
  */
-DilloPlain::DilloPlain(BrowserWindow *p_bw, const DilloUrl *p_url,
-                       const char *content_type)
+DilloPlain::DilloPlain(BrowserWindow *p_bw, const DilloUrl *p_url)
 {
    style::StyleAttrs styleAttrs;
    style::FontAttrs fontAttrs;
@@ -101,16 +95,9 @@
    bw = p_bw;
    url = a_Url_dup(p_url);
    dw = new Textblock (prefs.limit_text_width);
+   Start_Ofs = 0;
    state = ST_SeekingEol;
 
-   MSG("PLAIN content type: %s\n", content_type);
-   this->content_type = dStrdup(content_type);
-   /* get charset */
-   a_Misc_parse_content_type(content_type, NULL, NULL, &charset);
-   /* Initiallize the charset decoder */
-   decoder = a_Decode_charset_init(charset);
-   Buf_Consumed = 0;
-
    /* Create the font and attribute for the page. */
    fontAttrs.name = prefs.fw_fontname;
    fontAttrs.size = (int) rint(12.0 * prefs.font_factor);
@@ -140,9 +127,6 @@
 {
    MSG("::~DilloPlain()\n");
    a_Url_free(url);
-   a_Decode_free(decoder);
-   dFree(content_type);
-   dFree(charset);
    widgetStyle->unref();
 }
 
@@ -169,24 +153,12 @@
 {
    char *Start;
    char *data;
-   Dstr *new_text = NULL;
    uint_t i, len, MaxBytes;
 
-   _MSG(" DilloPlain::write Buf=%p, BufSize=%d Buf_Consumed=%d Eof=%d\n",
-       Buf, BufSize, Buf_Consumed, Eof);
-
-   char *str = (char*)Buf + Buf_Consumed;
-   int str_len = BufSize - Buf_Consumed;
+   _MSG("DilloPlain::write Eof=%d\n", Eof);
 
-   /* decode to target charset (UTF-8) */
-   if (decoder) {
-      new_text = a_Decode_process(decoder, str, str_len);
-      str = new_text->str;
-      str_len = new_text->len;
-   }
-
-   Start = str;
-   MaxBytes = str_len;
+   Start = (char*)Buf + Start_Ofs;
+   MaxBytes = BufSize - Start_Ofs;
    i = len = 0;
    while ( i < MaxBytes ) {
       switch ( state ) {
@@ -209,28 +181,28 @@
          break;
       }
    }
+   Start_Ofs += i - len;
    if (Eof && len) {
       data = dStrndup(Start + i - len, len);
       DW2TB(dw)->addText(a_Misc_expand_tabs(data), widgetStyle);
       DW2TB(dw)->addParbreak(0, widgetStyle);
       dFree(data);
-      len = 0;
+      Start_Ofs += len;
    }
-   Buf_Consumed = BufSize - len;
-   dStr_free(new_text, 1);
 
    DW2TB(dw)->flush(Eof ? true : false);
+
    if (bw)
-      a_UIcmd_set_page_prog(bw, Buf_Consumed, 1);
+      a_UIcmd_set_page_prog(bw, Start_Ofs, 1);
 }
 
 /*
  * Set callback function and callback data for "text/" MIME major-type.
  */
-void *a_Plain_text(const char *Type, void *P, CA_Callback_t *Call, void **Data)
+void *a_Plain_text(const char *type, void *P, CA_Callback_t *Call, void **Data)
 {
    DilloWeb *web = (DilloWeb*)P;
-   DilloPlain *plain = new DilloPlain(web->bw, web->url, Type);
+   DilloPlain *plain = new DilloPlain(web->bw, web->url);
 
    *Call = (CA_Callback_t)Plain_callback;
    *Data = (void*)plain;
--- a/src/uicmd.cc	Mon Jun 02 02:42:24 2008 +0200
+++ b/src/uicmd.cc	Mon Jun 02 04:27:43 2008 +0200
@@ -495,7 +495,9 @@
    int buf_size;
 
    if (a_Nav_get_buf(url, &buf, &buf_size)) {
-      a_Dialog_text_window(buf, "View Page source");
+      void *vWindow = a_Dialog_make_text_window(buf, "View Page source");
+      a_Nav_unref_buf(url);
+      a_Dialog_show_text_window(vWindow);
    }
 }
 
@@ -507,7 +509,9 @@
    BrowserWindow *bw = (BrowserWindow*)vbw;
 
    if (bw->num_page_bugs > 0) {
-      a_Dialog_text_window(bw->page_bugs->str, "Detected HTML errors");
+      void *vWindow = a_Dialog_make_text_window(bw->page_bugs->str,
+                                                "Detected HTML errors");
+      a_Dialog_show_text_window(vWindow);
    } else {
       a_Dialog_msg("Zero detected HTML errors!");
    }