view dpi/datauri.c @ 2030:382160be8c2f

cookies comments I was going to say something in the comments about simplicity and not implementing every little bit of the rfc when we don't know of cookies that make it necessary, but then I suppose that's all implied with dillo.
author corvid <corvid@lavabit.com>
date Tue, 17 May 2011 22:48:50 +0000
parents da16ca154e64
children
line wrap: on
line source
/*
 * File: datauri.c
 *
 * Copyright (C) 2006-2007 Jorge Arellano Cid <jcid@dillo.org>
 *
 * Filter dpi for the "data:" URI scheme (RFC 2397).
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 */

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>

#include "../dpip/dpip.h"
#include "dpiutil.h"

/*
 * Debugging macros
 */
#define SILENT 1
#define _MSG(...)
#if SILENT
 #define MSG(...)
#else
 #define MSG(...)  fprintf(stderr, "[datauri dpi]: " __VA_ARGS__)
#endif

/*
 * Global variables
 */
static Dsh *sh = NULL;

static void b64strip_illegal_chars(unsigned char* str)
{
   unsigned char *p, *s = str;

   MSG("len=%d{%s}\n", strlen((char*)str), str);

   for (p = s; (*p = *s); ++s) {
      if (isalnum(*p) || strchr("+/=", *p))
         ++p;
   }

   MSG("len=%d{%s}\n", strlen((char *)str), str);
}

static int b64decode(unsigned char* str)
{
   unsigned char *cur, *start;
   int d, dlast, phase;
   unsigned char c;
   static int table[256] = {
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 00-0F */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 10-1F */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63,  /* 20-2F */
      52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1,  /* 30-3F */
      -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,  /* 40-4F */
      15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1,  /* 50-5F */
      -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,  /* 60-6F */
      41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1,  /* 70-7F */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 80-8F */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 90-9F */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* A0-AF */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* B0-BF */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* C0-CF */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* D0-DF */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* E0-EF */
      -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1   /* F0-FF */
   };

   d = dlast = phase = 0;
   start = str;
   for (cur = str; *cur != '\0'; ++cur ) {
      // jer: treat line endings as physical breaks.
      //if (*cur == '\n' || *cur == '\r'){phase = dlast = 0; continue;}
      d = table[(int)*cur];
      if (d != -1) {
         switch(phase) {
         case 0:
            ++phase;
            break;
         case 1:
            c = ((dlast << 2) | ((d & 0x30) >> 4));
            *str++ = c;
            ++phase;
            break;
         case 2:
            c = (((dlast & 0xf) << 4) | ((d & 0x3c) >> 2));
            *str++ = c;
            ++phase;
            break;
         case 3:
            c = (((dlast & 0x03 ) << 6) | d);
            *str++ = c;
            phase = 0;
            break;
         }
         dlast = d;
      }
   }
   *str = '\0';
   return str - start;
}

/* Modified from src/url.c --------------------------------------------------*/

/*
 * Given an hex octet (e.g., e3, 2F, 20), return the corresponding
 * character if the octet is valid, and -1 otherwise
 */
static int Url_decode_hex_octet(const char *s)
{
   int hex_value;
   char *tail, hex[3];

   if (s && (hex[0] = s[0]) && (hex[1] = s[1])) {
      hex[2] = 0;
      hex_value = strtol(hex, &tail, 16);
      if (tail - hex == 2)
         return hex_value;
   }
   return -1;
}

/*
 * Parse possible hexadecimal octets in the URI path.
 * Returns a new allocated string.
 */
char *a_Url_decode_hex_str(const char *str, size_t *p_sz)
{
   char *new_str, *dest;
   int i, val;

   if (!str) {
      *p_sz = 0;
      return NULL;
   }

   dest = new_str = dNew(char, strlen(str) + 1);
   for (i = 0; str[i]; i++) {
      *dest++ = (str[i] == '%' && (val = Url_decode_hex_octet(str+i+1)) >= 0) ?
                i+=2, val : str[i];
   }
   *dest = 0;

   new_str = dRealloc(new_str, sizeof(char) * (dest - new_str + 1));
   *p_sz = (size_t)(dest - new_str);
   return new_str;
}

/* end ----------------------------------------------------------------------*/

/*
 * Send decoded data to dillo in an HTTP envelope.
 */
static void send_decoded_data(const char *url, const char *mime_type,
                              unsigned char *data, size_t data_sz)
{
   char *d_cmd;

   /* Send dpip tag */
   d_cmd = a_Dpip_build_cmd("cmd=%s url=%s", "start_send_page", url);
   a_Dpip_dsh_write_str(sh, 1, d_cmd);
   dFree(d_cmd);

   /* Send HTTP header. */
   a_Dpip_dsh_write_str(sh, 0, "Content-type: ");
   a_Dpip_dsh_write_str(sh, 0, mime_type);
   a_Dpip_dsh_write_str(sh, 1, "\n\n");

   /* Send message */
   a_Dpip_dsh_write(sh, 0, (char *)data, data_sz);
}

static void send_failure_message(const char *url, const char *mime_type,
                                 unsigned char *data, size_t data_sz)
{
   char *d_cmd;
   char buf[1024];

   const char *msg =
"<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>\n"
"<html><body>\n"
"<hr><h1>Datauri dpi</h1><hr>\n"
"<p><b>Can't parse datauri:</b><br>\n";
   const char *msg_mime_type="text/html";

   /* Send dpip tag */
   d_cmd = a_Dpip_build_cmd("cmd=%s url=%s", "start_send_page", url);
   a_Dpip_dsh_write_str(sh, 1, d_cmd);
   dFree(d_cmd);

   /* Send HTTP header. */
   a_Dpip_dsh_write_str(sh, 0, "Content-type: ");
   a_Dpip_dsh_write_str(sh, 0, msg_mime_type);
   a_Dpip_dsh_write_str(sh, 1, "\n\n");

   /* Send message */
   a_Dpip_dsh_write_str(sh, 0, msg);

   /* send some debug info */
   snprintf(buf, 1024, "mime_type: %s<br>data size: %d<br>data: %s<br>",
            mime_type, (int)data_sz, data);
   a_Dpip_dsh_write_str(sh, 0, buf);

   /* close page */
   a_Dpip_dsh_write_str(sh, 0, "</body></html>");
}

/*
 * Get mime type from the data URI.
 * TODO: there's no point in handling "charset" because current dillo
 * only handles ISO-LATIN-1. The FLTK2 version (utf-8) could use it in the
 * future.
 */
static char *datauri_get_mime(char *url)
{
   char buf[256];
   char *mime_type = NULL, *p;
   size_t len = 0;

   if (dStrncasecmp(url, "data:", 5) == 0) {
      if ((p = strchr(url, ',')) && p - url < 256) {
         url += 5;
         len = p - url;
         strncpy(buf, url, len);
         buf[len] = 0;
         /* strip ";base64" */
         if (len >= 7 && dStrcasecmp(buf + len - 7, ";base64") == 0) {
            len -= 7;
            buf[len] = 0;
         }
      }

      /* that's it, now handle omitted types */
      if (len == 0) {
         mime_type = dStrdup("text/plain;charset=US-ASCII");
      } else if (!dStrncasecmp(buf, "charset", 7)) {
         mime_type = dStrconcat("text/plain", buf, NULL);
      } else {
         mime_type = dStrdup(buf);
      }
   }

   return mime_type;
}

/*
 * Return a decoded data string.
 */
static unsigned char *datauri_get_data(char *url, size_t *p_sz)
{
   char *p;
   int is_base64 = 0;
   unsigned char *data = NULL;

   if ((p = strchr(url, ',')) && p - url >= 12 &&  /* "data:;base64" */
       dStrncasecmp(p - 7, ";base64", 7) == 0) {
      is_base64 = 1;
   }

   if (p) {
      ++p;
      if (is_base64) {
         data = (unsigned char *)Unescape_uri_str(p);
         b64strip_illegal_chars(data);
         *p_sz = (size_t) b64decode(data);
      } else {
         data = (unsigned char *)a_Url_decode_hex_str(p, p_sz);
      }
   } else {
      data = (unsigned char *)dStrdup("");
      *p_sz = 0;
   }

   return data;
}

/*
 *
 */
int main(void)
{
   char *dpip_tag = NULL, *cmd = NULL, *url = NULL, *mime_type;
   unsigned char *data;
   int rc;
   size_t data_size = 0;

   /* Initialize the SockHandler */
   sh = a_Dpip_dsh_new(STDIN_FILENO, STDOUT_FILENO, 8*1024);

   rc = chdir("/tmp");
   if (rc == -1) {
      MSG("paths: error changing directory to /tmp: %s\n",
          dStrerror(errno));
   }

   /* Authenticate our client... */
   if (!(dpip_tag = a_Dpip_dsh_read_token(sh, 1)) ||
       a_Dpip_check_auth(dpip_tag) < 0) {
      MSG("can't authenticate request: %s\n", dStrerror(errno));
      a_Dpip_dsh_close(sh);
      return 1;
   }
   dFree(dpip_tag);

   /* Read the dpi command from STDIN */
   dpip_tag = a_Dpip_dsh_read_token(sh, 1);
   MSG("[%s]\n", dpip_tag);

   cmd = a_Dpip_get_attr(dpip_tag, "cmd");
   url = a_Dpip_get_attr(dpip_tag, "url");
   if (!cmd || !url) {
      MSG("Error, cmd=%s, url=%s\n", cmd, url);
      exit (EXIT_FAILURE);
   }

   /* Parse the data URI */
   mime_type = datauri_get_mime(url);
   data = datauri_get_data(url, &data_size);

   MSG("mime_type: %s\n", mime_type);
   MSG("data_size: %d\n", (int)data_size);
   MSG("data: {%s}\n", data);

   if (mime_type && data) {
      /* good URI */
      send_decoded_data(url, mime_type, data, data_size);
   } else {
      /* malformed URI */
      send_failure_message(url, mime_type, data, data_size);
   }

   dFree(data);
   dFree(mime_type);
   dFree(url);
   dFree(cmd);
   dFree(dpip_tag);

   /* Finish the SockHandler */
   a_Dpip_dsh_close(sh);
   a_Dpip_dsh_free(sh);

   return 0;
}