jcid@0: /* jcid@0: * File: misc.c jcid@0: * jcid@35: * Copyright (C) 2000-2007 Jorge Arellano Cid , jcid@0: * jcid@0: * This program is free software; you can redistribute it and/or modify jcid@0: * it under the terms of the GNU General Public License as published by jcid@0: * the Free Software Foundation; either version 3 of the License, or jcid@0: * (at your option) any later version. jcid@0: */ jcid@0: jcid@0: #include jcid@0: #include jcid@0: #include jcid@0: #include Johannes@1453: #include jcid@0: Johannes@1128: #include "utf8.hh" jcid@0: #include "msg.h" jcid@0: #include "misc.h" jcid@0: jcid@0: /* jcid@0: * Escape characters as %XX sequences. jcid@0: * Return value: New string. jcid@0: */ jcid@306: char *a_Misc_escape_chars(const char *str, const char *esc_set) jcid@0: { corvid@1141: static const char *const hex = "0123456789ABCDEF"; jcid@0: char *p = NULL; jcid@0: Dstr *dstr; jcid@0: int i; jcid@0: jcid@0: dstr = dStr_sized_new(64); jcid@0: for (i = 0; str[i]; ++i) { jcid@0: if (str[i] <= 0x1F || str[i] == 0x7F || strchr(esc_set, str[i])) { jcid@0: dStr_append_c(dstr, '%'); jcid@0: dStr_append_c(dstr, hex[(str[i] >> 4) & 15]); jcid@0: dStr_append_c(dstr, hex[str[i] & 15]); jcid@0: } else { jcid@0: dStr_append_c(dstr, str[i]); jcid@0: } jcid@0: } jcid@0: p = dstr->str; jcid@0: dStr_free(dstr, FALSE); jcid@0: jcid@0: return p; jcid@0: } jcid@0: jcid@0: #define TAB_SIZE 8 jcid@0: /* jcid@0: * Takes a string and converts any tabs to spaces. jcid@0: */ Johannes@1453: int Johannes@1453: a_Misc_expand_tabs(char **start, char *end, char *buf, int buflen) jcid@0: { Johannes@1453: int j, pos = 0, written = 0, old_pos, char_len; Johannes@1128: uint_t code; Johannes@1453: static const int combining_char_space = 32; Johannes@1128: Johannes@1453: while (*start < end && written < buflen - TAB_SIZE - combining_char_space) { Johannes@1453: code = a_Utf8_decode(*start, end, &char_len); corvid@1160: Johannes@1453: if (code == '\t') { Johannes@1453: /* Fill with whitespaces until the next tab. */ Johannes@1453: old_pos = pos; Johannes@1453: pos += TAB_SIZE - (pos % TAB_SIZE); Johannes@1453: for (j = old_pos; j < pos; j++) Johannes@1453: buf[written++] = ' '; Johannes@1453: } else { Johannes@1453: assert(char_len <= 4); Johannes@1453: for (j = 0; j < char_len; j++) Johannes@1453: buf[written++] = (*start)[j]; Johannes@1453: pos++; jcid@0: } Johannes@1128: Johannes@1453: *start += char_len; jcid@0: } Johannes@1453: Johannes@1453: /* If following chars are combining chars (e.g. accents) add them to the Johannes@1453: * buffer. We have reserved combining_char_space bytes for this. Johannes@1453: * If there should be more combining chars, we split nevertheless. Johannes@1453: */ Johannes@1453: while (*start < end && written < buflen - 4) { Johannes@1453: code = a_Utf8_decode(*start, end, &char_len); Johannes@1453: Johannes@1453: if (! a_Utf8_combining_char(code)) Johannes@1453: break; Johannes@1453: Johannes@1453: assert(char_len <= 4); Johannes@1453: for (j = 0; j < char_len; j++) Johannes@1453: buf[written++] = (*start)[j]; corvid@1591: Johannes@1453: *start += char_len; Johannes@1453: } Johannes@1453: Johannes@1453: return written; jcid@0: } jcid@0: jcid@0: /* TODO: could use dStr ADT! */ jcid@0: typedef struct ContentType_ { jcid@0: const char *str; jcid@0: int len; jcid@0: } ContentType_t; jcid@0: jcid@0: static const ContentType_t MimeTypes[] = { jcid@0: { "application/octet-stream", 24 }, jcid@1707: { "application/xhtml+xml", 21 }, jcid@0: { "text/html", 9 }, jcid@0: { "text/plain", 10 }, jcid@0: { "image/gif", 9 }, jcid@0: { "image/png", 9 }, jcid@0: { "image/jpeg", 10 }, jcid@0: { NULL, 0 } jcid@0: }; jcid@0: jcid@250: typedef enum { jcid@250: DT_OCTET_STREAM = 0, jcid@250: DT_TEXT_HTML, jcid@250: DT_TEXT_PLAIN, jcid@250: DT_IMAGE_GIF, jcid@250: DT_IMAGE_PNG, jcid@250: DT_IMAGE_JPG, jcid@250: } DetectedContentType; jcid@250: jcid@0: /* jcid@0: * Detects 'Content-Type' from a data stream sample. jcid@0: * jcid@0: * It uses the magic(5) logic from file(1). Currently, it jcid@0: * only checks the few mime types that Dillo supports. jcid@0: * jcid@0: * 'Data' is a pointer to the first bytes of the raw data. jcid@0: * jcid@0: * Return value: (0 on success, 1 on doubt, 2 on lack of data). jcid@0: */ jcid@0: int a_Misc_get_content_type_from_data(void *Data, size_t Size, const char **PT) jcid@0: { jcid@250: size_t i, non_ascci, non_ascci_text, bin_chars; jcid@250: char *p = Data; jcid@0: int st = 1; /* default to "doubt' */ jcid@250: DetectedContentType Type = DT_OCTET_STREAM; /* default to binary */ jcid@0: jcid@0: /* HTML try */ jcid@1221: for (i = 0; i < Size && dIsspace(p[i]); ++i); jcid@0: if ((Size - i >= 5 && !dStrncasecmp(p+i, "= 5 && !dStrncasecmp(p+i, "= 6 && !dStrncasecmp(p+i, "= 14 && !dStrncasecmp(p+i, "= 17 && !dStrncasecmp(p+i, "