utf8.c (1844B)
1 #include "termbox.h" 2 3 static const unsigned char utf8_length[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 10 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 12 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 13 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 14 5, 6, 6, 1, 1 }; 15 16 static const unsigned char utf8_mask[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 17 0x01 }; 18 19 int 20 tb_utf8_char_length(char c) 21 { 22 return utf8_length[(unsigned char)c]; 23 } 24 25 int 26 tb_utf8_char_to_unicode(uint32_t *out, const char *c) 27 { 28 if (*c == 0) 29 return TB_EOF; 30 31 int i; 32 unsigned char len = tb_utf8_char_length(*c); 33 unsigned char mask = utf8_mask[len - 1]; 34 uint32_t result = c[0] & mask; 35 for (i = 1; i < len; ++i) { 36 result <<= 6; 37 result |= c[i] & 0x3f; 38 } 39 40 *out = result; 41 return (int)len; 42 } 43 44 int 45 tb_utf8_unicode_to_char(char *out, uint32_t c) 46 { 47 int len = 0; 48 int first; 49 int i; 50 51 if (c < 0x80) { 52 first = 0; 53 len = 1; 54 } else if (c < 0x800) { 55 first = 0xc0; 56 len = 2; 57 } else if (c < 0x10000) { 58 first = 0xe0; 59 len = 3; 60 } else if (c < 0x200000) { 61 first = 0xf0; 62 len = 4; 63 } else if (c < 0x4000000) { 64 first = 0xf8; 65 len = 5; 66 } else { 67 first = 0xfc; 68 len = 6; 69 } 70 71 for (i = len - 1; i > 0; --i) { 72 out[i] = (c & 0x3f) | 0x80; 73 c >>= 6; 74 } 75 out[0] = c | first; 76 77 return len; 78 }