Last active
June 19, 2022 17:08
-
-
Save ske2004/88dc140ca3641d5ef1b595d7f05c3ea2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static inline size_t Utf8_Fetch(uint32_t *out, const char *s_) | |
{ | |
const unsigned char *s = (const unsigned char*)s_; | |
if ((*s & 0xC0) != 0xC0) { | |
*out = *s; | |
return *s > 0; | |
} | |
const static size_t clas[32] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,2,2,2,2,3,3,4,5}; | |
size_t cl = clas[*s>>3]; | |
for (size_t i = 1; i < cl; ++i) { | |
if ((s[i] & 0xC0) == 0xC0 || (s[i] & 0x80) == 0) { | |
*out = s[0]; | |
return 1; | |
} | |
} | |
switch (cl) { | |
case 2: *out = ((s[0]&0x1f)<<6) | (s[1]&0x3f); break; | |
case 3: *out = ((s[0]&0xf)<<12) | ((s[1]&0x3f)<<6) | (s[2]&0x3f); break; | |
case 4: *out = ((s[0]&0x7)<<18) | ((s[1]&0x3f)<<12) | ((s[2]&0x3f)<<6) | (s[3]&0x3f); break; | |
default: *out = s[0]; return 1; // NOTE(skejeton): class 5 invalid, even if possible, just return it raw and map to class 1 (ascii) | |
} | |
return cl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment