changeset 1644:2650456b8199

filter automatic requests same_domain option in preference so that spying on users isn't so easy.
author corvid <corvid@lavabit.com>
date Tue, 06 Apr 2010 02:27:54 +0000
parents c5638daea2fc
children 777facee9e6e
files dillorc src/cache.c src/capi.c src/html.cc src/nav.c src/nav.h src/prefs.c src/prefs.h src/prefsparser.cc src/uicmd.cc src/url.c src/url.h src/web.cc src/web.hh
diffstat 14 files changed, 247 insertions(+), 30 deletions(-) [+]
line wrap: on
line diff
--- a/dillorc	Mon Apr 05 23:36:34 2010 +0200
+++ b/dillorc	Tue Apr 06 02:27:54 2010 +0000
@@ -26,6 +26,13 @@
 # (While browsing, this can be changed from the tools/settings menu.)
 #parse_embedded_css=YES
 
+# How should Dillo restrict automatic requests (e.g., redirections,
+# pages containing images or stylesheets)?
+# allow_all
+# same_domain : Permit www.example.org to load an image from img.example.org,
+#               but not from the unrelated ad.doubleclick.net.
+#filter_auto_requests=same_domain
+
 # Change the buffering scheme for drawing
 # 0 no double buffering - useful for debugging
 # 1 light buffering using a single back buffer for all windows
--- a/src/cache.c	Mon Apr 05 23:36:34 2010 +0200
+++ b/src/cache.c	Tue Apr 06 02:27:54 2010 +0000
@@ -941,7 +941,7 @@
          NewUrl = a_Url_new(URL_STR_(entry->Location), URL_STR_(entry->Url));
          if (entry->Flags & CA_TempRedirect)
             a_Url_set_flags(NewUrl, URL_FLAGS(NewUrl) | URL_E2EQuery);
-         a_Nav_push(bw, NewUrl);
+         a_Nav_push(bw, NewUrl, entry->Url);
          a_Url_free(NewUrl);
       } else {
          /* Sub entity redirection (most probably an image) */
--- a/src/capi.c	Mon Apr 05 23:36:34 2010 +0200
+++ b/src/capi.c	Tue Apr 06 02:27:54 2010 +0000
@@ -354,6 +354,61 @@
 }
 
 /*
+ * When dillo wants to open an URL, this can be either due to user action
+ * (e.g., typing in an URL, clicking a link), or automatic (HTTP header
+ * indicates redirection, META HTML tag with refresh attribute and 0 delay,
+ * and images and stylesheets on an HTML page when autoloading is enabled).
+ *
+ * For a user request, the action will be permitted.
+ * For an automatic request, permission to load depends on the filter set
+ * by the user.
+ */ 
+static bool_t Capi_filters_allow(const DilloUrl *wanted,
+                                 const DilloUrl *requester)
+{
+   bool_t ret;
+
+   if (requester == NULL) {
+      /* request made by user */
+      ret = TRUE;
+   } else {
+      switch (prefs.filter_auto_requests) {
+         case PREFS_FILTER_SAME_DOMAIN:
+         {
+            const char *req_host = URL_HOST(requester),
+                       *want_host = URL_HOST(wanted),
+                       *req_suffix,
+                       *want_suffix;
+            if (!req_host && !want_host) {
+               ret = TRUE;
+            } else if (!req_host || !want_host) {
+               ret = FALSE;
+            } else {
+               /* This will regard "www.dillo.org" and "www.dillo.org." as
+                * different, but it doesn't seem worth caring about.
+                */
+               req_suffix = a_Url_host_find_public_suffix(req_host);
+               want_suffix = a_Url_host_find_public_suffix(want_host);
+
+               ret = dStrcasecmp(req_suffix, want_suffix) == 0;
+            }
+
+            if (ret)
+               MSG("ALLOW\n");
+            else
+               MSG("DENY\n");
+            break;
+         }
+         case PREFS_FILTER_ALLOW_ALL:
+         default:
+            ret = TRUE;
+            break;
+      }
+   }
+   return ret;
+}
+
+/*
  * Most used function for requesting a URL.
  * TODO: clean up the ad-hoc bindings with an API that allows dynamic
  *       addition of new plugins.
@@ -369,6 +424,9 @@
    const char *scheme = URL_SCHEME(web->url);
    int safe = 0, ret = 0, use_cache = 0;
 
+   dReturn_val_if_fail((a_Capi_get_flags(web->url) & CAPI_IsCached) ||
+                       Capi_filters_allow(web->url, web->requester), 0);
+
    /* reload test */
    reload = (!(a_Capi_get_flags(web->url) & CAPI_IsCached) ||
              (URL_FLAGS(web->url) & URL_E2EQuery));
--- a/src/html.cc	Mon Apr 05 23:36:34 2010 +0200
+++ b/src/html.cc	Tue Apr 06 02:27:54 2010 +0000
@@ -105,8 +105,8 @@
                                   const char *attrname,
                                   int tag_parsing_flags);
 static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof);
-static void Html_load_image(BrowserWindow *bw, DilloUrl *url,
-                            DilloImage *image);
+static bool Html_load_image(BrowserWindow *bw, DilloUrl *url,
+                            const DilloUrl *requester, DilloImage *image);
 static void Html_callback(int Op, CacheClient_t *Client);
 static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx);
 
@@ -654,12 +654,21 @@
 {
    dReturn_if_fail (bw->nav_expecting == FALSE);
 
+   /* If the user asked for a specific URL, the user (NULL) is the requester,
+    * but if the user just asked for all URLs, use the page URL as the
+    * requester. If the possible patterns become more complex, it might be
+    * good to have the caller supply the requester instead.
+    */
+   const DilloUrl *requester = pattern ? NULL : this->page_url;
+
    for (int i = 0; i < images->size(); i++) {
       if (images->get(i)->image) {
          if ((!pattern) || (!a_Url_cmp(images->get(i)->url, pattern))) {
-            Html_load_image(bw, images->get(i)->url, images->get(i)->image);
-            a_Image_unref (images->get(i)->image);
-            images->get(i)->image = NULL;  // web owns it now
+            if (Html_load_image(bw, images->get(i)->url, requester,
+                                images->get(i)->image)) {
+               a_Image_unref (images->get(i)->image);
+               images->get(i)->image = NULL;  // web owns it now
+            }
          }
       }
    }
@@ -2089,9 +2098,10 @@
 
    load_now = prefs.load_images ||
               (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached);
-   Html_add_new_htmlimage(html, &url, load_now ? NULL : Image);
+   bool loading = false;
    if (load_now)
-      Html_load_image(html->bw, url, Image);
+      loading = Html_load_image(html->bw, url, html->page_url, Image);
+   Html_add_new_htmlimage(html, &url, loading ? NULL : Image);
 
    dFree(tooltip_str);
    dFree(width_ptr);
@@ -2103,13 +2113,13 @@
 /*
  * Tell cache to retrieve image
  */
-static void Html_load_image(BrowserWindow *bw, DilloUrl *url,
-                            DilloImage *Image)
+static bool Html_load_image(BrowserWindow *bw, DilloUrl *url,
+                            const DilloUrl *requester, DilloImage *Image)
 {
    DilloWeb *Web;
    int ClientKey;
    /* Fill a Web structure for the cache query */
-   Web = a_Web_new(url);
+   Web = a_Web_new(url, requester);
    Web->bw = bw;
    Web->Image = Image;
    a_Image_ref(Image);
@@ -2119,6 +2129,7 @@
       a_Bw_add_client(bw, ClientKey, 0);
       a_Bw_add_url(bw, url);
    }
+   return ClientKey != 0;
 }
 
 /*
@@ -2938,7 +2949,7 @@
    } else {
       /* Fill a Web structure for the cache query */
       int ClientKey;
-      DilloWeb *Web = a_Web_new(url);
+      DilloWeb *Web = a_Web_new(url, html->page_url);
       Web->bw = html->bw;
       if ((ClientKey = a_Capi_open_url(Web, Html_css_load_callback, NULL))) {
          ++html->bw->NumPendingStyleSheets;
--- a/src/nav.c	Mon Apr 05 23:36:34 2010 +0200
+++ b/src/nav.c	Tue Apr 06 02:27:54 2010 +0000
@@ -191,7 +191,8 @@
  * This function requests the page's root-URL; images and related stuff
  * are fetched directly by the HTML module.
  */
-static void Nav_open_url(BrowserWindow *bw, const DilloUrl *url, int offset)
+static void Nav_open_url(BrowserWindow *bw, const DilloUrl *url,
+                         const DilloUrl *requester, int offset)
 {
    DilloUrl *old_url;
    bool_t MustLoad, ForceReload, Repush, IgnoreScroll;
@@ -232,7 +233,7 @@
 
       // a_Menu_pagemarks_new(bw);
 
-      Web = a_Web_new(url);
+      Web = a_Web_new(url, requester);
       Web->bw = bw;
       Web->flags |= WEB_RootUrl;
       if ((ClientKey = a_Capi_open_url(Web, NULL, NULL)) != 0) {
@@ -341,7 +342,8 @@
  * - Set bw to expect the URL data
  * - Ask the cache to feed back the requested URL (via Nav_open_url)
  */
-void a_Nav_push(BrowserWindow *bw, const DilloUrl *url)
+void a_Nav_push(BrowserWindow *bw, const DilloUrl *url,
+                                   const DilloUrl *requester)
 {
    dReturn_if_fail (bw != NULL);
 
@@ -353,7 +355,7 @@
    a_Nav_cancel_expect(bw);
    bw->nav_expect_url = a_Url_dup(url);
    bw->nav_expecting = TRUE;
-   Nav_open_url(bw, url, 0);
+   Nav_open_url(bw, url, requester, 0);
 }
 
 /*
@@ -370,7 +372,7 @@
       a_Url_set_flags(url, URL_FLAGS(url) | URL_ReloadFromCache);
       bw->nav_expect_url = a_Url_dup(url);
       bw->nav_expecting = TRUE;
-      Nav_open_url(bw, url, 0);
+      Nav_open_url(bw, url, NULL, 0);
       a_Url_free(url);
    }
 }
@@ -407,7 +409,7 @@
 
    if (bw->meta_refresh_status == 2) {
       Nav_stack_move_ptr(bw, -1);
-      a_Nav_push(bw, bw->meta_refresh_url);
+      a_Nav_push(bw, bw->meta_refresh_url,a_History_get_url(NAV_TOP_UIDX(bw)));
    }
    a_Url_free(bw->meta_refresh_url);
    bw->meta_refresh_url = NULL;
@@ -441,7 +443,7 @@
    a_Nav_cancel_expect(bw);
    if (--idx >= 0){
       a_UIcmd_set_msg(bw, "");
-      Nav_open_url(bw, a_History_get_url(NAV_UIDX(bw,idx)), -1);
+      Nav_open_url(bw, a_History_get_url(NAV_UIDX(bw,idx)), NULL, -1);
    }
 }
 
@@ -455,7 +457,7 @@
    a_Nav_cancel_expect(bw);
    if (++idx < a_Nav_stack_size(bw)) {
       a_UIcmd_set_msg(bw, "");
-      Nav_open_url(bw, a_History_get_url(NAV_UIDX(bw,idx)), +1);
+      Nav_open_url(bw, a_History_get_url(NAV_UIDX(bw,idx)), NULL, +1);
    }
 }
 
@@ -464,7 +466,7 @@
  */
 void a_Nav_home(BrowserWindow *bw)
 {
-   a_Nav_push(bw, prefs.home);
+   a_Nav_push(bw, prefs.home, NULL);
 }
 
 /*
@@ -499,7 +501,7 @@
          a_Url_set_flags(r_url, URL_FLAGS(r_url) & ~URL_SpamSafe);
          bw->nav_expect_url = r_url;
          bw->nav_expecting = TRUE;
-         Nav_open_url(bw, r_url, 0);
+         Nav_open_url(bw, r_url, NULL, 0);
       }
    }
 }
@@ -526,7 +528,7 @@
       a_UIcmd_open_url_nw(bw, a_History_get_url(NAV_UIDX(bw,idx)));
    } else {
       a_Nav_cancel_expect(bw);
-      Nav_open_url(bw, a_History_get_url(NAV_UIDX(bw,idx)), offset);
+      Nav_open_url(bw, a_History_get_url(NAV_UIDX(bw,idx)), NULL, offset);
       a_UIcmd_set_buttons_sens(bw);
    }
 }
@@ -563,7 +565,7 @@
 void a_Nav_save_url(BrowserWindow *bw,
                     const DilloUrl *url, const char *filename)
 {
-   DilloWeb *Web = a_Web_new(url);
+   DilloWeb *Web = a_Web_new(url, NULL);
    Web->bw = bw;
    Web->filename = dStrdup(filename);
    Web->flags |= WEB_Download;
--- a/src/nav.h	Mon Apr 05 23:36:34 2010 +0200
+++ b/src/nav.h	Tue Apr 06 02:27:54 2010 +0000
@@ -14,7 +14,8 @@
 #endif /* __cplusplus */
 
 void a_Nav_redirection0(BrowserWindow *bw, const DilloUrl *new_url);
-void a_Nav_push(BrowserWindow *bw, const DilloUrl *url);
+void a_Nav_push(BrowserWindow *bw, const DilloUrl *url,
+                const DilloUrl *requester);
 void a_Nav_repush(BrowserWindow *bw);
 void a_Nav_back(BrowserWindow *bw);
 void a_Nav_forw(BrowserWindow *bw);
--- a/src/prefs.c	Mon Apr 05 23:36:34 2010 +0200
+++ b/src/prefs.c	Tue Apr 06 02:27:54 2010 +0000
@@ -39,6 +39,7 @@
    prefs.buffered_drawing = 1;
    prefs.contrast_visited_color = TRUE;
    prefs.enterpress_forces_submit = FALSE;
+   prefs.filter_auto_requests = PREFS_FILTER_SAME_DOMAIN;
    prefs.focus_new_tab = TRUE;
    prefs.font_cursive = dStrdup(PREFS_FONT_CURSIVE);
    prefs.font_factor = 1.0;
--- a/src/prefs.h	Mon Apr 05 23:36:34 2010 +0200
+++ b/src/prefs.h	Tue Apr 06 02:27:54 2010 +0000
@@ -26,6 +26,9 @@
 /* Panel sizes */
 enum { P_tiny = 0, P_small, P_medium, P_large };
 
+enum {PREFS_FILTER_ALLOW_ALL,
+      PREFS_FILTER_SAME_DOMAIN};
+
 typedef struct _DilloPrefs DilloPrefs;
 
 struct _DilloPrefs {
@@ -71,6 +74,7 @@
    bool_t load_images;
    bool_t load_stylesheets;
    bool_t parse_embedded_css;
+   int filter_auto_requests;
    int32_t buffered_drawing;
    char *font_serif;
    char *font_sans_serif;
--- a/src/prefsparser.cc	Mon Apr 05 23:36:34 2010 +0200
+++ b/src/prefsparser.cc	Tue Apr 06 02:27:54 2010 +0000
@@ -26,6 +26,7 @@
    PREFS_INT32,
    PREFS_DOUBLE,
    PREFS_GEOMETRY,
+   PREFS_FILTER,
    PREFS_PANEL_SIZE
 } PrefType_t;
 
@@ -50,6 +51,7 @@
       { "contrast_visited_color", &prefs.contrast_visited_color, PREFS_BOOL },
       { "enterpress_forces_submit", &prefs.enterpress_forces_submit,
         PREFS_BOOL },
+      { "filter_auto_requests", &prefs.filter_auto_requests, PREFS_FILTER },
       { "focus_new_tab", &prefs.focus_new_tab, PREFS_BOOL },
       { "font_cursive", &prefs.font_cursive, PREFS_STRING },
       { "font_factor", &prefs.font_factor, PREFS_DOUBLE },
@@ -138,6 +140,15 @@
       a_Misc_parse_geometry(value, &prefs.xpos, &prefs.ypos,
                             &prefs.width, &prefs.height);
       break;
+   case PREFS_FILTER:
+      if (!dStrcasecmp(value, "same_domain"))
+         prefs.filter_auto_requests = PREFS_FILTER_SAME_DOMAIN;
+      else {
+         if (dStrcasecmp(value, "allow_all"))
+            MSG_WARN("prefs: unrecognized value for filter_auto_requests\n");
+         prefs.filter_auto_requests = PREFS_FILTER_ALLOW_ALL;
+      }
+      break;
    case PREFS_PANEL_SIZE:
       if (!dStrcasecmp(value, "tiny"))
          prefs.panel_size = P_tiny;
--- a/src/uicmd.cc	Mon Apr 05 23:36:34 2010 +0200
+++ b/src/uicmd.cc	Tue Apr 06 02:27:54 2010 +0000
@@ -612,7 +612,7 @@
  */
 void a_UIcmd_open_url(BrowserWindow *bw, const DilloUrl *url)
 {
-   a_Nav_push(bw, url);
+   a_Nav_push(bw, url, NULL);
    if (BW2UI(bw)->get_panelmode() == UI_TEMPORARILY_SHOW_PANELS)
       BW2UI(bw)->set_panelmode(UI_HIDDEN);
    a_UIcmd_focus_main_area(bw);
@@ -624,7 +624,7 @@
     * Location if we don't yet have an URL, main otherwise.
     */
    if (url) {
-      a_Nav_push(new_bw, url);
+      a_Nav_push(new_bw, url, NULL);
       BW2UI(new_bw)->focus_main();
    } else {
       BW2UI(new_bw)->focus_location();
--- a/src/url.c	Mon Apr 05 23:36:34 2010 +0200
+++ b/src/url.c	Tue Apr 06 02:27:54 2010 +0000
@@ -634,3 +634,121 @@
    }
    return new_str;
 }
+
+/*
+ * Is the provided hostname an IP address?
+ */
+static bool_t Url_host_is_ip(const char *host)
+{
+   uint_t len;
+
+   if (!host || !*host)
+      return FALSE;
+
+   len = strlen(host);
+
+   if (len == strspn(host, "0123456789.")) {
+      _MSG("an IPv4 address\n");
+      return TRUE;
+   }
+   if (*host == '[' &&
+       (len == strspn(host, "0123456789abcdefABCDEF:.[]"))) {
+      /* The precise format is shown in section 3.2.2 of rfc 3986 */
+      _MSG("an IPv6 address\n");
+      return TRUE;
+   }
+   return FALSE;
+}
+
+/*
+ * How many internal dots are in the public portion of this hostname?
+ * e.g., for "www.dillo.org", it is one because everything under "dillo.org",
+ * as a .org domain, is part of one organization.
+ *
+ * Of course this is only a simple and imperfect approximation of
+ * organizational boundaries.
+ */
+static uint_t Url_host_public_internal_dots(const char *host)
+{
+   uint_t ret = 1;
+
+   if (host) {
+      int start, after, tld_len;
+
+      /* We may be able to trust the format of the host string more than
+       * I am here. Trailing dots and no dots are real possibilities, though.
+       */
+      after = strlen(host);
+      if (after > 0 && host[after - 1] == '.')
+         after--;
+      start = after;
+      while (start > 0 && host[start - 1] != '.')
+         start--;
+      tld_len = after - start;
+
+      if (tld_len > 0) {
+         /* These TLDs were chosen by examining the current publicsuffix list
+          * in January 2010 and picking out those where it was simplest for
+          * them to describe the situation by beginning with a "*.[tld]" rule.
+          */
+         const char *const tlds[] = {"ar","au","bd","bn","bt","ck","cy","do",
+                                     "eg","er","et","fj","fk","gt","gu","id",
+                                     "il","jm","ke","kh","kw","ml","mm","mt",
+                                     "mz","ni","np","nz","om","pg","py","qa",
+                                     "sv","tr","uk","uy","ve","ye","yu","za",
+                                     "zm","zw"};
+         uint_t i, tld_num = sizeof(tlds) / sizeof(tlds[0]);
+
+         for (i = 0; i < tld_num; i++) {
+            if (strlen(tlds[i]) == (uint_t) tld_len &&
+                !dStrncasecmp(tlds[i], host + start, tld_len)) {
+               MSG("TLD code matched %s\n", tlds[i]);
+               ret++;
+               break;
+            }
+         }
+      }
+   }
+   return ret;
+}
+
+/*
+ * Given a URL host string, return the portion that is public, i.e., the
+ * domain that is in a registry outside the organization.
+ * For 'www.dillo.org', that would be 'dillo.org'.
+ */
+const char *a_Url_host_find_public_suffix(const char *host)
+{
+   const char *s;
+   uint_t dots;
+
+   if (!host || !*host || Url_host_is_ip(host))
+      return host;
+
+   s = host;
+
+   while (s[1])
+      s++;
+
+   if (s > host && *s == '.') {
+      /* don't want to deal with trailing dot */
+      s--;
+   }
+
+   dots = Url_host_public_internal_dots(host);
+
+   /* With a proper host string, we should not be pointing to a dot now. */
+
+   while (s > host) {
+      if (s[-1] == '.') {
+         if (dots == 0)
+            break;
+         else
+            dots--;
+      }
+      s--;
+   }
+
+   MSG("public suffix of %s is %s\n", host, s);
+   return s;
+}
--- a/src/url.h	Mon Apr 05 23:36:34 2010 +0200
+++ b/src/url.h	Tue Apr 06 02:27:54 2010 +0000
@@ -123,7 +123,7 @@
 char *a_Url_decode_hex_str(const char *str);
 char *a_Url_encode_hex_str(const char *str);
 char *a_Url_string_strip_delimiters(const char *str);
-
+const char *a_Url_host_find_public_suffix(const char *host);
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */
--- a/src/web.cc	Mon Apr 05 23:36:34 2010 +0200
+++ b/src/web.cc	Tue Apr 06 02:27:54 2010 +0000
@@ -103,12 +103,13 @@
 /*
  * Allocate and set safe values for a DilloWeb structure
  */
-DilloWeb* a_Web_new(const DilloUrl *url)
+DilloWeb* a_Web_new(const DilloUrl *url, const DilloUrl *requester)
 {
    DilloWeb *web= dNew(DilloWeb, 1);
 
    _MSG(" a_Web_new: ValidWebs ==> %d\n", dList_length(ValidWebs));
    web->url = a_Url_dup(url);
+   web->requester = a_Url_dup(requester);
    web->bw = NULL;
    web->flags = 0;
    web->Image = NULL;
@@ -136,6 +137,7 @@
 {
    if (!web) return;
    a_Url_free(web->url);
+   a_Url_free(web->requester);
    a_Image_unref(web->Image);
    dFree(web->filename);
    dList_remove(ValidWebs, (void *)web);
--- a/src/web.hh	Mon Apr 05 23:36:34 2010 +0200
+++ b/src/web.hh	Tue Apr 06 02:27:54 2010 +0000
@@ -22,6 +22,8 @@
 
 struct _DilloWeb {
   DilloUrl *url;              /* Requested URL */
+  DilloUrl *requester;        /* URL that caused this request, or
+                               * NULL if user-initiated. */
   BrowserWindow *bw;          /* The requesting browser window [reference] */
   int flags;                  /* Additional info */
 
@@ -34,7 +36,7 @@
 };
 
 void a_Web_init(void);
-DilloWeb* a_Web_new (const DilloUrl* url);
+DilloWeb* a_Web_new (const DilloUrl* url, const DilloUrl *requester);
 int a_Web_valid(DilloWeb *web);
 void a_Web_free (DilloWeb*);
 int a_Web_dispatch_by_type (const char *Type, DilloWeb *web,