view src/utf8.hh @ 1453:328111d18d57

respect UTF-8 when splitting long lines in (noticed by corvid) When splitting long lines in plain text to avoid X11 coordinate overflows we need to make sure that multibyte UTF-8 chars are not split. Additionally combining chars like accents should stay together with their base char.
author Johannes Hofmann <>
date Sun, 29 Nov 2009 21:40:02 +0100
parents 5d6869b28e4d
children af9ec59638e0
line wrap: on
line source
#ifndef __UTF8_HH__
#define __UTF8_HH__

#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */

#include "d_size.h"

 * Unicode replacement character U+FFFD
 * "used to replace an incoming character whose value is unknown or otherwise
 * unrepresentable in Unicode"
static const char utf8_replacement_char[] = "\xEF\xBF\xBD";

uint_t a_Utf8_end_of_char(const char *str, uint_t i);
uint_t a_Utf8_decode(const char*, const char* end, int* len);
int a_Utf8_encode(unsigned int ucs, char *buf);
int a_Utf8_test(const char* src, unsigned int srclen);
bool_t a_Utf8_ideographic(const char *s, const char *end, int *len);
bool_t a_Utf8_combining_char(int unicode);

#ifdef __cplusplus
#endif /* __cplusplus */

#endif /* __UTF8_HH__ */