Mercurial > dillo_port1.3
changeset 1128:fbe2ce98a4b8
make tab expansion for plain text utf8 aware
In discussion with corvid <corvid@lavabit.com>.
author | Johannes Hofmann <Johannes.Hofmann@gmx.de> |
---|---|
date | Mon, 25 May 2009 18:42:24 +0200 |
parents | 65c7e33e4466 |
children | 205e83fe66c0 |
files | src/misc.c src/utf8.cc src/utf8.hh |
diffstat | 3 files changed, 29 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/src/misc.c Sun May 24 21:52:59 2009 +0000 +++ b/src/misc.c Mon May 25 18:42:24 2009 +0200 @@ -16,6 +16,7 @@ #include <string.h> #include <ctype.h> +#include "utf8.hh" #include "msg.h" #include "misc.h" @@ -47,7 +48,6 @@ return p; } - #define TAB_SIZE 8 /* * Takes a string and converts any tabs to spaces. @@ -55,23 +55,27 @@ char *a_Misc_expand_tabs(const char *str, int len) { Dstr *New = dStr_new(""); - int i, j, pos, old_pos; + int i = 0, j, pos = 0, old_pos, char_len; + uint_t code; char *val; - if (len) { - for (pos = 0, i = 0; i < len; i++) { - if (str[i] == '\t') { - /* Fill with whitespaces until the next tab. */ - old_pos = pos; - pos += TAB_SIZE - (pos % TAB_SIZE); - for (j = old_pos; j < pos; j++) - dStr_append_c(New, ' '); - } else { - dStr_append_c(New, str[i]); - pos++; - } + while (i < len) { + code = a_Utf8_decode(&str[i], str + len, &char_len); + + if (code == '\t') { + /* Fill with whitespaces until the next tab. */ + old_pos = pos; + pos += TAB_SIZE - (pos % TAB_SIZE); + for (j = old_pos; j < pos; j++) + dStr_append_c(New, ' '); + } else { + dStr_append_l(New, &str[i], char_len); + pos++; } + + i += char_len; } + val = New->str; dStr_free(New, FALSE); return val;
--- a/src/utf8.cc Sun May 24 21:52:59 2009 +0000 +++ b/src/utf8.cc Mon May 25 18:42:24 2009 +0200 @@ -36,6 +36,16 @@ } /* + * Decode a single UTF-8-encoded character starting at p. + * The resulting Unicode value (in the range 0-0x10ffff) is returned, + * and len is set the the number of bytes in the UTF-8 encoding. + */ +uint_t a_Utf8_decode(const char* str, const char* end, int* len) +{ + return utf8decode(str, end, len); +} + +/* * Write UTF-8 encoding of ucs into buf and return number of bytes written. */ int a_Utf8_encode(unsigned int ucs, char *buf)
--- a/src/utf8.hh Sun May 24 21:52:59 2009 +0000 +++ b/src/utf8.hh Mon May 25 18:42:24 2009 +0200 @@ -9,6 +9,7 @@ #include "d_size.h" uint_t a_Utf8_end_of_char(const char *str, uint_t i); +uint_t a_Utf8_decode(const char*, const char* end, int* len); int a_Utf8_encode(unsigned int ucs, char *buf); int a_Utf8_test(const char* src, unsigned int srclen);