changeset 1131:276e290accca

Minor utf8 refactor
author corvid <corvid@lavabit.com>
date Tue, 26 May 2009 20:25:00 -0400
parents cdcb6c1fb148
children d79bbfd60a5b
files src/decode.c src/utf8.hh
diffstat 2 files changed, 10 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/src/decode.c	Mon May 25 23:37:12 2009 +0000
+++ b/src/decode.c	Tue May 26 20:25:00 2009 -0400
@@ -15,6 +15,7 @@
 #include <stdlib.h>     /* strtol */
 
 #include "decode.h"
+#include "utf8.hh"
 #include "msg.h"
 
 static const int bufsize = 8*1024;
@@ -164,15 +165,8 @@
       if (rc == EILSEQ){
          inPtr++;
          inLeft--;
-         /*
-          * U+FFFD: "used to replace an incoming character whose value is
-          *        unknown or unrepresentable in Unicode."
-          */
-          //dStr_append(output, "\ufffd");
-          // \uxxxx is C99. UTF-8-specific:
-          dStr_append_c(output, 0xEF);
-          dStr_append_c(output, 0xBF);
-          dStr_append_c(output, 0xBD);
+         dStr_append_l(output, utf8_replacement_char,
+                       sizeof(utf8_replacement_char) - 1);
       }
    }
    dStr_erase(dc->leftover, 0, dc->leftover->len - inLeft);
--- a/src/utf8.hh	Mon May 25 23:37:12 2009 +0000
+++ b/src/utf8.hh	Tue May 26 20:25:00 2009 -0400
@@ -8,6 +8,13 @@
 
 #include "d_size.h"
 
+/*
+ * Unicode replacement character U+FFFD
+ * "used to replace an incoming character whose value is unknown or otherwise
+ * unrepresentable in Unicode"
+ */
+const char utf8_replacement_char[] = "\xEF\xBF\xBD";
+
 uint_t a_Utf8_end_of_char(const char *str, uint_t i);
 uint_t a_Utf8_decode(const char*, const char* end, int* len);
 int a_Utf8_encode(unsigned int ucs, char *buf);