diff src/misc.c @ 0:6ee11bf9e3ea

Initial revision
author jcid
date Sun, 07 Oct 2007 00:36:34 +0200
parents
children d9e7b35430de
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/misc.c	Sun Oct 07 00:36:34 2007 +0200
@@ -0,0 +1,271 @@
+/*
+ * File: misc.c
+ *
+ * Copyright (C) 2000 Jorge Arellano Cid <jcid@dillo.org>,
+ *                    Jörgen Viksell <vsksga@hotmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "msg.h"
+#include "misc.h"
+
+
+/*
+ * Escape characters as %XX sequences.
+ * Return value: New string.
+ */
+char *a_Misc_escape_chars(const char *str, char *esc_set)
+{
+   static const char *hex = "0123456789ABCDEF";
+   char *p = NULL;
+   Dstr *dstr;
+   int i;
+
+   dstr = dStr_sized_new(64);
+   for (i = 0; str[i]; ++i) {
+      if (str[i] <= 0x1F || str[i] == 0x7F || strchr(esc_set, str[i])) {
+         dStr_append_c(dstr, '%');
+         dStr_append_c(dstr, hex[(str[i] >> 4) & 15]);
+         dStr_append_c(dstr, hex[str[i] & 15]);
+      } else {
+         dStr_append_c(dstr, str[i]);
+      }
+   }
+   p = dstr->str;
+   dStr_free(dstr, FALSE);
+
+   return p;
+}
+
+
+#define TAB_SIZE 8
+/*
+ * Takes a string and converts any tabs to spaces.
+ */
+char *a_Misc_expand_tabs(const char *str)
+{
+   Dstr *New = dStr_new("");
+   int len, i, j, pos, old_pos;
+   char *val;
+
+   if ((len = strlen(str))) {
+      for (pos = 0, i = 0; i < len; i++) {
+         if (str[i] == '\t') {
+            /* Fill with whitespaces until the next tab. */
+            old_pos = pos;
+            pos += TAB_SIZE - (pos % TAB_SIZE);
+            for (j = old_pos; j < pos; j++)
+               dStr_append_c(New, ' ');
+         } else {
+            dStr_append_c(New, str[i]);
+            pos++;
+         }
+      }
+   }
+   val = New->str;
+   dStr_free(New, FALSE);
+   return val;
+}
+
+/* TODO: could use dStr ADT! */
+typedef struct ContentType_ {
+   const char *str;
+   int len;
+} ContentType_t;
+
+static const ContentType_t MimeTypes[] = {
+   { "application/octet-stream", 24 },
+   { "text/html", 9 },
+   { "text/plain", 10 },
+   { "image/gif", 9 },
+   { "image/png", 9 },
+   { "image/jpeg", 10 },
+   { NULL, 0 }
+};
+
+/*
+ * Detects 'Content-Type' from a data stream sample.
+ *
+ * It uses the magic(5) logic from file(1). Currently, it
+ * only checks the few mime types that Dillo supports.
+ *
+ * 'Data' is a pointer to the first bytes of the raw data.
+ *
+ * Return value: (0 on success, 1 on doubt, 2 on lack of data).
+ */
+int a_Misc_get_content_type_from_data(void *Data, size_t Size, const char **PT)
+{
+   int st = 1;      /* default to "doubt' */
+   int Type = 0;    /* default to "application/octet-stream" */
+   char *p = Data;
+   size_t i, non_ascci;
+
+   /* HTML try */
+   for (i = 0; i < Size && isspace(p[i]); ++i);
+   if ((Size - i >= 5  && !dStrncasecmp(p+i, "<html", 5)) ||
+       (Size - i >= 5  && !dStrncasecmp(p+i, "<head", 5)) ||
+       (Size - i >= 6  && !dStrncasecmp(p+i, "<title", 6)) ||
+       (Size - i >= 14 && !dStrncasecmp(p+i, "<!doctype html", 14)) ||
+       /* this line is workaround for FTP through the Squid proxy */
+       (Size - i >= 17 && !dStrncasecmp(p+i, "<!-- HTML listing", 17))) {
+
+      Type = 1;
+      st = 0;
+   /* Images */
+   } else if (Size >= 4 && !dStrncasecmp(p, "GIF8", 4)) {
+      Type = 3;
+      st = 0;
+   } else if (Size >= 4 && !dStrncasecmp(p, "\x89PNG", 4)) {
+      Type = 4;
+      st = 0;
+   } else if (Size >= 2 && !dStrncasecmp(p, "\xff\xd8", 2)) {
+      /* JPEG has the first 2 bytes set to 0xffd8 in BigEndian - looking
+       * at the character representation should be machine independent. */
+      Type = 5;
+      st = 0;
+
+   /* Text */
+   } else {
+      /* We'll assume "text/plain" if the set of chars above 127 is <= 10
+       * in a 256-bytes sample.  Better heuristics are welcomed! :-) */
+      non_ascci = 0;
+      Size = MIN (Size, 256);
+      for (i = 0; i < Size; i++)
+         if ((uchar_t) p[i] > 127)
+            ++non_ascci;
+      if (Size == 256) {
+         Type = (non_ascci > 10) ? 0 : 2;
+         st = 0;
+      } else {
+         Type = (non_ascci > 0) ? 0 : 2;
+      }
+   }
+
+   *PT = MimeTypes[Type].str;
+   return st;
+}
+
+/*
+ * Check the server-supplied 'Content-Type' against our detected type.
+ * (some servers seem to default to "text/plain").
+ *
+ * Return value:
+ *  0,  if they match
+ *  -1, if a mismatch is detected
+ *
+ * There're many MIME types Dillo doesn't know, they're handled
+ * as "application/octet-stream" (as the SPEC says).
+ *
+ * A mismatch happens when receiving a binary stream as
+ * "text/plain" or "text/html", or an image that's not an image of its kind.
+ *
+ * Note: this is a basic security procedure.
+ *
+ */
+int a_Misc_content_type_check(const char *EntryType, const char *DetectedType)
+{
+   int i;
+   int st = -1;
+
+   _MSG("Type check:  [Srv: %s  Det: %s]\n", EntryType, DetectedType);
+
+   if (!EntryType)
+      return 0; /* there's no mismatch without server type */
+
+   for (i = 1; MimeTypes[i].str; ++i)
+      if (dStrncasecmp(EntryType, MimeTypes[i].str, MimeTypes[i].len) == 0)
+         break;
+
+   if (!MimeTypes[i].str) {
+      /* type not found, no mismatch */
+      st = 0;
+   } else if (dStrncasecmp(EntryType, "image/", 6) == 0 &&
+             !dStrncasecmp(DetectedType,MimeTypes[i].str,MimeTypes[i].len)){
+      /* An image, and there's an exact match */
+      st = 0;
+   } else if (dStrncasecmp(EntryType, "text/", 5) ||
+              dStrncasecmp(DetectedType, "application/", 12)) {
+      /* Not an application sent as text */
+      st = 0;
+   }
+
+   return st;
+}
+
+/*
+ * Parse a geometry string.
+ */
+int a_Misc_parse_geometry(char *str, int *x, int *y, int *w, int *h)
+{
+   char *p, *t1, *t2;
+   int n1, n2;
+   int ret = 0;
+
+   if ((p = strchr(str, 'x')) || (p = strchr(str, 'X'))) {
+      n1 = strtol(str, &t1, 10);
+      n2 = strtol(++p, &t2, 10);
+      if (t1 != str && t2 != p) {
+         *w = n1;
+         *h = n2;
+         ret = 1;
+         /* parse x,y now */
+         p = t2;
+         n1 = strtol(p, &t1, 10);
+         n2 = strtol(t1, &t2, 10);
+         if (t1 != p && t2 != t1) {
+            *x = n1;
+            *y = n2;
+         }
+      }
+   }
+   _MSG("geom: w,h,x,y = (%d,%d,%d,%d)\n", *w, *h, *x, *y);
+   return ret;
+}
+
+/*
+ * Encodes string using base64 encoding.
+ * Return value: new string or NULL if input string is empty.
+ */
+char *a_Misc_encode_base64(const char *in)
+{
+   static const char *base64_hex = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                   "abcdefghijklmnopqrstuvwxyz"
+                                   "0123456789+/";
+   char *out = NULL;
+   int len, i = 0;
+
+   if (in == NULL) return NULL;
+   len = strlen(in);
+
+   out = (char *)dMalloc((len + 2) / 3 * 4 + 1);
+
+   for (; len >= 3; len -= 3) {
+      out[i++] = base64_hex[in[0] >> 2];
+      out[i++] = base64_hex[((in[0]<<4) & 0x30) | (in[1]>>4)];
+      out[i++] = base64_hex[((in[1]<<2) & 0x3c) | (in[2]>>6)];
+      out[i++] = base64_hex[in[2] & 0x3f];
+      in += 3;
+   }
+
+   if (len > 0) {
+      unsigned char fragment;
+      out[i++] = base64_hex[in[0] >> 2];
+      fragment = (in[0] << 4) & 0x30;
+      if (len > 1) fragment |= in[1] >> 4;
+      out[i++] = base64_hex[fragment];
+      out[i++] = (len < 2) ? '=' : base64_hex[(in[1] << 2) & 0x3c];
+      out[i++] = '=';
+   }
+   out[i] = '\0';
+   return out;
+}