Improved Unicode surrogate pairs parsing in xs_json.

This commit is contained in:
default 2022-09-20 07:15:39 +02:00
parent 5e438f8353
commit b97f4c7a90

View File

@ -229,7 +229,7 @@ d_char *_xs_json_loads_lexer(const char **json, js_type *t)
while ((c = *s) != '"' && c != '\0') { while ((c = *s) != '"' && c != '\0') {
char tmp[5]; char tmp[5];
int i; int cp, i;
if (c == '\\') { if (c == '\\') {
s++; s++;
@ -240,15 +240,30 @@ d_char *_xs_json_loads_lexer(const char **json, js_type *t)
case 't': c = '\t'; break; case 't': c = '\t'; break;
case 'u': /* Unicode codepoint as an hex char */ case 'u': /* Unicode codepoint as an hex char */
s++; s++;
tmp[0] = (char)*s; s++; memcpy(tmp, s, 4);
tmp[1] = (char)*s; s++; s += 3;
tmp[2] = (char)*s; s++;
tmp[3] = (char)*s;
tmp[4] = '\0'; tmp[4] = '\0';
xs_debug();
sscanf(tmp, "%04x", &i); sscanf(tmp, "%04x", &i);
v = xs_utf8_enc(v, i); if (i >= 0xd800 && i <= 0xdfff) {
/* it's a surrogate pair */
cp = (i & 0x3ff) << 10;
/* skip to the next value */
s += 3;
memcpy(tmp, s, 4);
s += 3;
sscanf(tmp, "%04x", &i);
cp |= (i & 0x3ff);
cp += 0x10000;
}
else
cp = i;
v = xs_utf8_enc(v, cp);
c = '\0'; c = '\0';
break; break;