From 2137d2f13310aca3cef6a0fc7735fdf4aac53e8c Mon Sep 17 00:00:00 2001 From: default Date: Thu, 3 Aug 2023 08:42:38 +0200 Subject: [PATCH] Backport from xs. --- xs_json.h | 14 ++++++ xs_unicode.h | 131 ++++++++++++++++++++++++++++++++++++++++++++++----- xs_version.h | 2 +- 3 files changed, 134 insertions(+), 13 deletions(-) diff --git a/xs_json.h b/xs_json.h index 388cfe0..d702a85 100644 --- a/xs_json.h +++ b/xs_json.h @@ -5,6 +5,7 @@ #define _XS_JSON_H xs_str *xs_json_dumps_pp(const xs_val *data, int indent); +int xs_json_dump_pp(const xs_val *data, int indent, FILE *f); #define xs_json_dumps(data) xs_json_dumps_pp(data, 0) xs_val *xs_json_loads(const xs_str *json); @@ -165,6 +166,19 @@ xs_str *xs_json_dumps_pp(const xs_val *data, int indent) } +int xs_json_dump_pp(const xs_val *data, int indent, FILE *f) +/* dumps data into a file as JSON */ +{ + xs *j = xs_json_dumps_pp(data, indent); + + if (j == NULL) + return 0; + + fwrite(j, strlen(j), 1, f); + return 1; +} + + /** JSON loads **/ /* this code comes mostly from the Minimum Profit Text Editor (MPDM) */ diff --git a/xs_unicode.h b/xs_unicode.h index d45b52e..48cd660 100644 --- a/xs_unicode.h +++ b/xs_unicode.h @@ -5,8 +5,15 @@ #define _XS_UNICODE_H xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint); - char *xs_utf8_dec(const char *str, unsigned int *cpoint); - + unsigned int xs_utf8_dec(char **str); + unsigned int *_xs_unicode_upper_search(unsigned int cpoint); + unsigned int *_xs_unicode_lower_search(unsigned int cpoint); + #define xs_unicode_is_upper(cpoint) (!!_xs_unicode_upper_search(cpoint)) + #define xs_unicode_is_lower(cpoint) (!!_xs_unicode_lower_search(cpoint)) + unsigned int xs_unicode_to_upper(unsigned int cpoint); + unsigned int xs_unicode_to_lower(unsigned int cpoint); + int xs_unicode_nfd(unsigned int cpoint, unsigned int *base, unsigned int *diac); + int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint); #ifdef XS_IMPLEMENTATION @@ -50,46 +57,146 @@ xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint) } -char *xs_utf8_dec(const char *str, unsigned int *cpoint) -/* decodes an utf-8 char inside str into cpoint and returns the next position */ +unsigned int xs_utf8_dec(char **str) +/* decodes an utf-8 char inside str and updates the pointer */ { - unsigned char *p = (unsigned char *)str; + unsigned char *p = (unsigned char *)*str; + unsigned int cpoint = 0; int c = *p++; int cb = 0; if ((c & 0x80) == 0) { /* 1 byte char */ - *cpoint = c; + cpoint = c; } else if ((c & 0xe0) == 0xc0) { /* 2 byte char */ - *cpoint = (c & 0x1f) << 6; + cpoint = (c & 0x1f) << 6; cb = 1; } else if ((c & 0xf0) == 0xe0) { /* 3 byte char */ - *cpoint = (c & 0x0f) << 12; + cpoint = (c & 0x0f) << 12; cb = 2; } else if ((c & 0xf8) == 0xf0) { /* 4 byte char */ - *cpoint = (c & 0x07) << 18; + cpoint = (c & 0x07) << 18; cb = 3; } /* process the continuation bytes */ while (cb--) { if ((*p & 0xc0) == 0x80) - *cpoint |= (*p++ & 0x3f) << (cb * 6); + cpoint |= (*p++ & 0x3f) << (cb * 6); else { - *cpoint = 0xfffd; + cpoint = 0xfffd; break; } } - return (char *)p; + *str = (char *)p; + return cpoint; } +#ifdef _XS_UNICODE_TBL_H + +/* include xs_unicode_tbl.h before to use these functions */ + +static int int_cmp(const void *p1, const void *p2) +{ + const unsigned int *a = p1; + const unsigned int *b = p2; + + return *a < *b ? -1 : *a > *b ? 1 : 0; +} + + +unsigned int *_xs_unicode_upper_search(unsigned int cpoint) +/* searches for an uppercase codepoint in the case fold table */ +{ + return bsearch(&cpoint, xs_unicode_case_fold_table, + sizeof(xs_unicode_case_fold_table) / (sizeof(unsigned int) * 2), + sizeof(unsigned int) * 2, + int_cmp); +} + + +unsigned int *_xs_unicode_lower_search(unsigned int cpoint) +/* searches for a lowercase codepoint in the case fold table */ +{ + unsigned int *p = xs_unicode_case_fold_table + 1; + unsigned int *e = xs_unicode_case_fold_table + + sizeof(xs_unicode_case_fold_table) / sizeof(unsigned int); + + while (p < e) { + if (cpoint == *p) + return p; + + p += 2; + } + + return NULL; +} + + +unsigned int xs_unicode_to_upper(unsigned int cpoint) +/* returns the cpoint to uppercase */ +{ + unsigned int *p = _xs_unicode_lower_search(cpoint); + + return p == NULL ? cpoint : p[-1]; +} + + +unsigned int xs_unicode_to_lower(unsigned int cpoint) +/* returns the cpoint to lowercase */ +{ + unsigned int *p = _xs_unicode_upper_search(cpoint); + + return p == NULL ? cpoint : p[1]; +} + + +int xs_unicode_nfd(unsigned int cpoint, unsigned int *base, unsigned int *diac) +/* applies unicode Normalization Form D */ +{ + unsigned int *r = bsearch(&cpoint, xs_unicode_nfd_table, + sizeof(xs_unicode_nfd_table) / (sizeof(unsigned int) * 3), + sizeof(unsigned int) * 3, + int_cmp); + + if (r != NULL) { + *base = r[1]; + *diac = r[2]; + } + + return !!r; +} + + +int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint) +/* applies unicode Normalization Form C */ +{ + unsigned int *p = xs_unicode_nfd_table; + unsigned int *e = xs_unicode_nfd_table + + sizeof(xs_unicode_nfd_table) / sizeof(unsigned int); + + while (p < e) { + if (p[1] == base && p[2] == diac) { + *cpoint = p[0]; + return 1; + } + + p += 3; + } + + return 0; +} + + +#endif /* _XS_UNICODE_TBL_H */ + #endif /* XS_IMPLEMENTATION */ #endif /* _XS_UNICODE_H */ diff --git a/xs_version.h b/xs_version.h index bc84175..6f137ea 100644 --- a/xs_version.h +++ b/xs_version.h @@ -1 +1 @@ -/* 4c151c4cc5b7c6980c1f0dd733a3fab0a30f9695 */ +/* 5ad148b1c1dbbf7b4550c9fcd13d96ac6def2d21 */