comparison src/utf8.cc @ 1103:94b9265663f6

New utility function: a_Utf8_end_of_char()
author corvid <corvid@lavabit.com>
date Sun, 17 May 2009 12:04:13 -0400
parents 614b1d02e6c3
children fbe2ce98a4b8
comparison
equal deleted inserted replaced
1102:c44c405a772f 1103:94b9265663f6
14 #include "utf8.hh" 14 #include "utf8.hh"
15 15
16 // C++ functions with C linkage ---------------------------------------------- 16 // C++ functions with C linkage ----------------------------------------------
17 17
18 /* 18 /*
19 * Return index of the last byte of the UTF-8-encoded character that str + i
20 * points to or into.
21 */
22 uint_t a_Utf8_end_of_char(const char *str, uint_t i)
23 {
24 /* We can almost get what we want from utf8fwd(p+1,...)-1, but that
25 * does not work for the last character in a string, and the fn makes some
26 * assumptions that do not suit us.
27 * Here's something very simpleminded instead:
28 */
29 if (str && *str && (str[i] & 0x80)) {
30 int internal_bytes = (str[i] & 0x40) ? 0 : 1;
31
32 while (((str[i + 1] & 0xc0) == 0x80) && (++internal_bytes < 4))
33 i++;
34 }
35 return i;
36 }
37
38 /*
19 * Write UTF-8 encoding of ucs into buf and return number of bytes written. 39 * Write UTF-8 encoding of ucs into buf and return number of bytes written.
20 */ 40 */
21 int a_Utf8_encode(unsigned int ucs, char *buf) 41 int a_Utf8_encode(unsigned int ucs, char *buf)
22 { 42 {
23 return utf8encode(ucs, buf); 43 return utf8encode(ucs, buf);