changeset 46:af8ce3cdb9be

Added the missing decode.[ch]
author jcid
date Wed, 14 Nov 2007 12:40:05 +0100
parents 1680e6f679f6
children 1b7243d62913
files src/decode.c src/decode.h
diffstat 2 files changed, 257 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/decode.c	Wed Nov 14 12:40:05 2007 +0100
@@ -0,0 +1,228 @@
+
+#include <zlib.h>
+#include <iconv.h>
+#include <errno.h>
+
+#include "decode.h"
+#include "msg.h"
+
+
+const int bufsize = 8*1024;
+
+
+static Dstr *Decode_null(Decode *dc, const char *inData, int inLen)
+{
+   Dstr *d = dStr_new("");
+   dStr_append_l(d, inData, inLen);
+   return d;
+}
+
+static void Decode_null_free(Decode *dc)
+{
+}
+
+
+static Dstr *Decode_gzip(Decode *dc, const char *inData, int inLen)
+{
+   int rc = Z_OK;
+
+   z_stream *zs = (z_stream *)dc->state;
+
+   int inputConsumed = 0;
+   Dstr *output = dStr_new("");
+
+   while ((rc == Z_OK) && (inputConsumed < inLen)) {
+      zs->next_in = (char *)inData + inputConsumed;
+      zs->avail_in = inLen - inputConsumed;
+
+      zs->next_out = dc->buffer;
+      zs->avail_out = bufsize;
+
+      rc = inflate(zs, Z_SYNC_FLUSH);
+
+      if ((rc == Z_OK) || (rc == Z_STREAM_END)) {
+         // Z_STREAM_END at end of file
+
+         inputConsumed += zs->total_in;
+
+         dStr_append_l(output, dc->buffer, zs->total_out);
+
+         zs->total_out = 0;
+         zs->total_in = 0;
+      }
+   }
+
+   return output;
+}
+
+static void Decode_gzip_free(Decode *dc)
+{
+   (void)inflateEnd((z_stream *)dc->state);
+
+   dFree(dc->buffer);
+}
+
+
+static Dstr *Decode_charset(Decode *dc, const char *inData, int inLen)
+{
+   int rc = 0;
+
+   Dstr *input, *output;
+   char *inPtr, *outPtr;
+   int inLeft, outRoom;
+
+   output = dStr_new("");
+
+   input = dc->leftover;
+   dStr_append_l(input, inData, inLen);
+   inPtr = input->str;
+   inLeft = input->len;
+
+
+   while ((rc != EINVAL) && (inLeft > 0)) {
+
+      outPtr = dc->buffer;
+      outRoom = bufsize;
+
+      rc = iconv((iconv_t)dc->state, &inPtr, &inLeft, &outPtr, &outRoom);
+
+      // iconv() on success, number of bytes converted
+      //         -1, errno == EILSEQ illegal byte sequence found
+      //                      EINVAL partial character ends source buffer
+      //                      E2BIG  destination buffer is full
+
+      dStr_append_l(output, dc->buffer, bufsize - outRoom);
+
+      if (rc == -1)
+         rc = errno;
+      if (rc == EILSEQ){
+         inPtr++;
+         inLeft--;
+         /*
+          * U+FFFD: "used to replace an incoming character whose value is
+          *        unknown or unrepresentable in Unicode."
+          */
+          //dStr_append(output, "\ufffd");
+          dStr_append_c(output, 0xEF);
+          dStr_append_c(output, 0xBF);
+          dStr_append_c(output, 0xBD);
+      }
+   }
+
+   dc->leftover = input;
+   dStr_erase(dc->leftover, 0, dc->leftover->len - inLeft);
+
+   return output;
+}
+
+static void Decode_charset_free(Decode *dc)
+{
+   (void)iconv_close((iconv_t)(dc->state));
+
+   dFree(dc->buffer);
+   dStr_free(dc->leftover, 1);
+}
+
+
+Decode *a_Decode_content_init(const char *format)
+{
+   Decode *dc = (Decode *)dMalloc(sizeof(Decode));
+
+   dc->buffer = NULL;
+   dc->state = NULL;
+
+   /* not used */
+   dc->leftover = NULL;
+
+   if (format && !dStrcasecmp(format, "gzip")) {
+
+      MSG("compressed data! : %s\n", format);
+      
+      z_stream *zs;
+      dc->buffer = (char *)dMalloc(bufsize);
+      dc->state = zs = (z_stream *)dMalloc(sizeof(z_stream));
+      zs->zalloc = NULL;
+      zs->zfree = NULL;
+      zs->next_in = NULL;
+      zs->avail_in = 0;
+
+      /* 16 is a magic number for gzip decoding */
+      inflateInit2(zs, MAX_WBITS+16);
+
+      dc->decode = Decode_gzip;
+      dc->free = Decode_gzip_free;
+   } else {
+      dc->decode = Decode_null;
+      dc->free = Decode_null_free;
+   }
+   return dc;      
+}
+
+static int Decode_is_latin1(const char *str)
+{
+   return (!(dStrcasecmp(str, "ISO-8859-1") ||
+             dStrcasecmp(str, "latin1") ||
+             dStrcasecmp(str, "ISO_8859-1:1987") ||
+             dStrcasecmp(str, "ISO_8859-1") ||
+             dStrcasecmp(str, "iso-ir-100") ||
+             dStrcasecmp(str, "l1") ||
+             dStrcasecmp(str, "IBM819") ||
+             dStrcasecmp(str, "CP819") ||
+             dStrcasecmp(str, "csISOLatin1")));
+}
+
+static int Decode_is_ascii(const char *str)
+{
+   return (!(dStrcasecmp(str, "ASCII") ||
+             dStrcasecmp(str, "US-ASCII") ||
+             dStrcasecmp(str, "us") ||
+             dStrcasecmp(str, "IBM367") ||
+             dStrcasecmp(str, "cp367") ||
+             dStrcasecmp(str, "csASCII") ||
+             dStrcasecmp(str, "ANSI_X3.4-1968") ||
+             dStrcasecmp(str, "iso-ir-6") ||
+             dStrcasecmp(str, "ANSI_X3.4-1986") ||
+             dStrcasecmp(str, "ISO_646.irv:1991") ||
+             dStrcasecmp(str, "ISO646-US")));
+}
+
+Decode *a_Decode_charset_init(const char *format)
+{
+   Decode *dc = (Decode *)dMalloc(sizeof(Decode));
+
+   if (format &&
+       strlen(format) &&
+       dStrcasecmp(format,"UTF-8") &&
+       !Decode_is_latin1(format) &&
+       !Decode_is_ascii(format)) {
+
+      iconv_t ic;
+      dc->state = ic = iconv_open("UTF-8", format);
+      if (ic != (iconv_t) -1) {
+           dc->buffer = (char *)dMalloc(bufsize);
+           dc->leftover = dStr_new("");
+
+           dc->decode = Decode_charset;
+           dc->free = Decode_charset_free;
+           return dc;
+      } else {
+         MSG("Unable to convert from character encoding: '%s'\n", format);
+      }
+   }
+   dc->leftover = NULL;
+   dc->buffer = NULL;
+   dc->decode = Decode_null;
+   dc->free = Decode_null_free;
+   return dc;      
+}
+
+Dstr *a_Decode_process(Decode *dc, const char *inData, int inLen)
+{
+   return dc->decode(dc, inData, inLen);
+}
+
+void a_Decode_free(Decode *dc)
+{
+   dc->free(dc);
+   dFree(dc);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/decode.h	Wed Nov 14 12:40:05 2007 +0100
@@ -0,0 +1,29 @@
+#ifndef __DECODE_H__
+#define __DECODE_H__
+
+#include "../dlib/dlib.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+typedef struct _Decode    Decode;
+
+struct _Decode {
+   char *buffer;
+   Dstr *leftover;
+   void *state;
+   Dstr *(*decode) (Decode *dc, const char *inData, int inLen);
+   void (*free) (Decode *dc);
+};
+
+Decode *a_Decode_content_init(const char *format);
+Decode *a_Decode_charset_init(const char *format);
+Dstr *a_Decode_process(Decode *dc, const char *inData, int inLen);
+void a_Decode_free(Decode *dc);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __DECODE_H__ */