From bf435af788d387b3d97fd744e3b1f6a73795beb8 Mon Sep 17 00:00:00 2001 From: default Date: Wed, 27 Dec 2023 12:54:38 +0100 Subject: [PATCH] Backport from xs. --- xs_hex.h | 140 ++++++++++++++++++++++++++--------- xs_socket.h | 43 +++++++---- xs_unicode.h | 202 ++++++++++++++++++++++++++++++--------------------- xs_version.h | 2 +- 4 files changed, 253 insertions(+), 134 deletions(-) diff --git a/xs_hex.h b/xs_hex.h index 2d87a65..21183f4 100644 --- a/xs_hex.h +++ b/xs_hex.h @@ -4,32 +4,111 @@ #define _XS_HEX_H -xs_str *xs_hex_enc(const xs_val *data, int size); -xs_val *xs_hex_dec(const xs_str *hex, int *size); -int xs_is_hex(const char *str); + int xs_is_hex_digit(char str); + void xs_hex_enc_1(char **dst, const char **src); + int xs_hex_dec_1(char **dst, const char **src); + char *_xs_hex_enc(char *dst, const char *src, int src_size); + char *_xs_hex_dec(char *dst, const char *src, int src_size); + +#ifdef _XS_H + xs_str *xs_hex_enc(const xs_val *data, int size); + xs_val *xs_hex_dec(const xs_str *hex, int *size); + int xs_is_hex(const char *str); +#endif /* _XS_H */ + #ifdef XS_IMPLEMENTATION +#include + /** hex **/ static char rev_hex_digits[] = "fedcba9876543210FEDCBA"; +int xs_is_hex_digit(char str) +/* checks if the char is an hex digit */ +{ + return strchr(rev_hex_digits, str) != NULL; +} + + +void xs_hex_enc_1(char **dst, const char **src) +/* decodes one character into two hex digits */ +{ + const char *i = *src; + char *o = *dst; + + *o++ = rev_hex_digits[0xf - (*i >> 4 & 0xf)]; + *o++ = rev_hex_digits[0xf - (*i & 0xf)]; + + *src = i + 1; + *dst = o; +} + + +int xs_hex_dec_1(char **dst, const char **src) +/* decodes two hex digits (returns 0 on error) */ +{ + const char *i = *src; + char *o = *dst; + + char *d1 = strchr(rev_hex_digits, *i++); + char *d2 = strchr(rev_hex_digits, *i++); + + if (!d1 || !d2) { + /* decoding error */ + return 0; + } + + *o++ = (0xf - ((d1 - rev_hex_digits) & 0xf)) << 4 | + (0xf - ((d2 - rev_hex_digits) & 0xf)); + + *src = i; + *dst = o; + return 1; +} + + +char *_xs_hex_enc(char *dst, const char *src, int src_size) +/* hex-encodes the src buffer into dst, which has enough size */ +{ + const char *e = src + src_size; + + while (src < e) + xs_hex_enc_1(&dst, &src); + + return dst; +} + + +char *_xs_hex_dec(char *dst, const char *src, int src_size) +/* hex-decodes the src string int dst, which has enough size. + return NULL on decoding errors or the final position of dst */ +{ + if (src_size % 2) + return NULL; + + const char *e = src + src_size; + + while (src < e) { + if (!xs_hex_dec_1(&dst, &src)) + return NULL; + } + + return dst; +} + + +#ifdef _XS_H + xs_str *xs_hex_enc(const xs_val *data, int size) /* returns an hexdump of data */ { - xs_str *s; - char *p; - int n; + xs_str *s = xs_realloc(NULL, _xs_blk_size(size * 2 + 1)); - p = s = xs_realloc(NULL, _xs_blk_size(size * 2 + 1)); + char *q = _xs_hex_enc(s, data, size); - for (n = 0; n < size; n++) { - *p++ = rev_hex_digits[0xf - (*data >> 4 & 0xf)]; - *p++ = rev_hex_digits[0xf - (*data & 0xf)]; - data++; - } - - *p = '\0'; + *q = '\0'; return s; } @@ -40,29 +119,14 @@ xs_val *xs_hex_dec(const xs_str *hex, int *size) { int sz = strlen(hex); xs_val *s = NULL; - char *p; - int n; - if (sz % 2) - return NULL; - - p = s = xs_realloc(NULL, _xs_blk_size(sz / 2 + 1)); - - for (n = 0; n < sz; n += 2) { - char *d1 = strchr(rev_hex_digits, *hex++); - char *d2 = strchr(rev_hex_digits, *hex++); - - if (!d1 || !d2) { - /* decoding error */ - return xs_free(s); - } - - *p++ = (0xf - ((d1 - rev_hex_digits) & 0xf)) << 4 | - (0xf - ((d2 - rev_hex_digits) & 0xf)); - } - - *p = '\0'; *size = sz / 2; + s = xs_realloc(NULL, _xs_blk_size(*size + 1)); + + if (!_xs_hex_dec(s, hex, sz)) + return xs_free(s); + + s[*size] = '\0'; return s; } @@ -71,14 +135,18 @@ xs_val *xs_hex_dec(const xs_str *hex, int *size) int xs_is_hex(const char *str) /* returns 1 if str is an hex string */ { + if (strlen(str) % 2) + return 0; + while (*str) { - if (strchr(rev_hex_digits, *str++) == NULL) + if (!xs_is_hex_digit(*str++)) return 0; } return 1; } +#endif /* _XS_H */ #endif /* XS_IMPLEMENTATION */ diff --git a/xs_socket.h b/xs_socket.h index eea2f2d..ab67a6b 100644 --- a/xs_socket.h +++ b/xs_socket.h @@ -7,9 +7,13 @@ int xs_socket_timeout(int s, double rto, double sto); int xs_socket_server(const char *addr, const char *serv); FILE *xs_socket_accept(int rs); -xs_str *xs_socket_peername(int s); +int _xs_socket_peername(int s, char *buf, int buf_size); int xs_socket_connect(const char *addr, const char *serv); +#ifdef _XS_H +xs_str *xs_socket_peername(int s); +#endif + #ifdef XS_IMPLEMENTATION @@ -17,6 +21,9 @@ int xs_socket_connect(const char *addr, const char *serv); #include #include #include +#include +#include +#include int xs_socket_timeout(int s, double rto, double sto) @@ -100,34 +107,28 @@ FILE *xs_socket_accept(int rs) } -xs_str *xs_socket_peername(int s) -/* returns the remote address as a string */ +int _xs_socket_peername(int s, char *buf, int buf_size) +/* fill the buffer with the socket peername */ { - xs_str *ip = NULL; struct sockaddr_storage addr; socklen_t slen = sizeof(addr); + const char *p = NULL; if (getpeername(s, (struct sockaddr *)&addr, &slen) != -1) { - char buf[1024]; - const char *p = NULL; - if (addr.ss_family == AF_INET) { struct sockaddr_in *sa = (struct sockaddr_in *)&addr; - p = inet_ntop(AF_INET, &sa->sin_addr, buf, sizeof(buf)); + p = inet_ntop(AF_INET, &sa->sin_addr, buf, buf_size); } else if (addr.ss_family == AF_INET6) { struct sockaddr_in6 *sa = (struct sockaddr_in6 *)&addr; - p = inet_ntop(AF_INET6, &sa->sin6_addr, buf, sizeof(buf)); + p = inet_ntop(AF_INET6, &sa->sin6_addr, buf, buf_size); } - - if (p != NULL) - ip = xs_str_new(p); } - return ip; + return p != NULL; } @@ -195,6 +196,22 @@ int xs_socket_connect(const char *addr, const char *serv) } +#ifdef _XS_H + +xs_str *xs_socket_peername(int s) +/* returns the remote address as a string */ +{ + char buf[2028]; + xs_str *p = NULL; + + if (_xs_socket_peername(s, buf, sizeof(buf))) + p = xs_str_new(buf); + + return p; +} + +#endif /* _XS_H */ + #endif /* XS_IMPLEMENTATION */ #endif /* _XS_SOCKET_H */ diff --git a/xs_unicode.h b/xs_unicode.h index f5880f0..c666479 100644 --- a/xs_unicode.h +++ b/xs_unicode.h @@ -5,7 +5,6 @@ #define _XS_UNICODE_H int _xs_utf8_enc(char buf[4], unsigned int cpoint); - xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint); unsigned int xs_utf8_dec(char **str); int xs_unicode_width(unsigned int cpoint); int xs_is_surrogate(unsigned int cpoint); @@ -21,13 +20,20 @@ int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint); int xs_unicode_is_alpha(unsigned int cpoint); +#ifdef _XS_H + xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint); +#endif + #ifdef XS_IMPLEMENTATION +#ifndef countof +#define countof(a) (sizeof((a)) / sizeof((*a))) +#endif int _xs_utf8_enc(char buf[4], unsigned int cpoint) /* encodes an Unicode codepoint to utf-8 into buf and returns the size in bytes */ { - unsigned char *p = (unsigned char *)buf; + char *p = buf; if (cpoint < 0x80) /* 1 byte char */ *p++ = cpoint & 0xff; @@ -48,27 +54,16 @@ int _xs_utf8_enc(char buf[4], unsigned int cpoint) *p++ = 0x80 | (cpoint & 0x3f); } - return p - (unsigned char *)buf; -} - - -xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint) -/* encodes an Unicode codepoint to utf-8 into str */ -{ - char tmp[4]; - - int c = _xs_utf8_enc(tmp, cpoint); - - return xs_append_m(str, tmp, c); + return p - buf; } unsigned int xs_utf8_dec(char **str) /* decodes an utf-8 char inside str and updates the pointer */ { - unsigned char *p = (unsigned char *)*str; + char *p = *str; unsigned int cpoint = 0; - int c = *p++; + unsigned char c = *p++; int cb = 0; if ((c & 0x80) == 0) { /* 1 byte char */ @@ -91,30 +86,19 @@ unsigned int xs_utf8_dec(char **str) } /* process the continuation bytes */ - while (cb--) { - if ((*p & 0xc0) == 0x80) - cpoint |= (*p++ & 0x3f) << (cb * 6); - else { - cpoint = 0xfffd; - break; - } - } + while (cb > 0 && *p && (*p & 0xc0) == 0x80) + cpoint |= (*p++ & 0x3f) << (--cb * 6); - *str = (char *)p; + /* incomplete or broken? */ + if (cb) + cpoint = 0xfffd; + + *str = p; return cpoint; } -static int int_range_cmp(const void *p1, const void *p2) -{ - const unsigned int *a = p1; - const unsigned int *b = p2; - - return *a < b[0] ? -1 : *a > b[1] ? 1 : 0; -} - - -/* intentionally dead simple */ +/** Unicode character width: intentionally dead simple **/ static unsigned int xs_unicode_width_table[] = { 0x300, 0x36f, 0, /* diacritics */ @@ -132,12 +116,23 @@ static unsigned int xs_unicode_width_table[] = { int xs_unicode_width(unsigned int cpoint) /* returns the width in columns of a Unicode codepoint (somewhat simplified) */ { - unsigned int *r = bsearch(&cpoint, xs_unicode_width_table, - sizeof(xs_unicode_width_table) / (sizeof(unsigned int) * 3), - sizeof(unsigned int) * 3, - int_range_cmp); + int b = 0; + int t = countof(xs_unicode_width_table) / 3 - 1; - return r ? r[2] : 1; + while (t >= b) { + int n = (b + t) / 2; + unsigned int *p = &xs_unicode_width_table[n * 3]; + + if (cpoint < p[0]) + t = n - 1; + else + if (cpoint > p[1]) + b = n + 1; + else + return p[2]; + } + + return 1; } @@ -167,53 +162,62 @@ unsigned int xs_surrogate_enc(unsigned int cpoint) } +#ifdef _XS_H + +xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint) +/* encodes an Unicode codepoint to utf-8 into str */ +{ + char tmp[4]; + + int c = _xs_utf8_enc(tmp, cpoint); + + return xs_append_m(str, tmp, c); +} + +#endif /* _XS_H */ + + #ifdef _XS_UNICODE_TBL_H /* include xs_unicode_tbl.h before this one to use these functions */ -static int int_cmp(const void *p1, const void *p2) -{ - const unsigned int *a = p1; - const unsigned int *b = p2; - - return *a < *b ? -1 : *a > *b ? 1 : 0; -} - - unsigned int *_xs_unicode_upper_search(unsigned int cpoint) /* searches for an uppercase codepoint in the case fold table */ { - return bsearch(&cpoint, xs_unicode_case_fold_table, - sizeof(xs_unicode_case_fold_table) / (sizeof(unsigned int) * 2), - sizeof(unsigned int) * 2, - int_cmp); -} + int b = 0; + int t = countof(xs_unicode_case_fold_table) / 2 + 1; + while (t >= b) { + int n = (b + t) / 2; + unsigned int *p = &xs_unicode_case_fold_table[n * 2]; -unsigned int *_xs_unicode_lower_search(unsigned int cpoint) -/* searches for a lowercase codepoint in the case fold table */ -{ - unsigned int *p = xs_unicode_case_fold_table + 1; - unsigned int *e = xs_unicode_case_fold_table + - sizeof(xs_unicode_case_fold_table) / sizeof(unsigned int); - - while (p < e) { - if (cpoint == *p) + if (cpoint < p[0]) + t = n - 1; + else + if (cpoint > p[0]) + b = n + 1; + else return p; - - p += 2; } return NULL; } -unsigned int xs_unicode_to_upper(unsigned int cpoint) -/* returns the cpoint to uppercase */ +unsigned int *_xs_unicode_lower_search(unsigned int cpoint) +/* searches for a lowercase codepoint in the case fold table */ { - unsigned int *p = _xs_unicode_lower_search(cpoint); + unsigned int *p = xs_unicode_case_fold_table; + unsigned int *e = p + countof(xs_unicode_case_fold_table); - return p == NULL ? cpoint : p[-1]; + while (p < e) { + if (cpoint == p[1]) + return p; + + p += 2; + } + + return NULL; } @@ -226,20 +230,40 @@ unsigned int xs_unicode_to_lower(unsigned int cpoint) } +unsigned int xs_unicode_to_upper(unsigned int cpoint) +/* returns the cpoint to uppercase */ +{ + unsigned int *p = _xs_unicode_lower_search(cpoint); + + return p == NULL ? cpoint : p[0]; +} + + int xs_unicode_nfd(unsigned int cpoint, unsigned int *base, unsigned int *diac) /* applies unicode Normalization Form D */ { - unsigned int *r = bsearch(&cpoint, xs_unicode_nfd_table, - sizeof(xs_unicode_nfd_table) / (sizeof(unsigned int) * 3), - sizeof(unsigned int) * 3, - int_cmp); + int b = 0; + int t = countof(xs_unicode_nfd_table) / 3 - 1; - if (r != NULL) { - *base = r[1]; - *diac = r[2]; + while (t >= b) { + int n = (b + t) / 2; + unsigned int *p = &xs_unicode_nfd_table[n * 3]; + + int c = cpoint - p[0]; + + if (c < 0) + t = n - 1; + else + if (c > 0) + b = n + 1; + else { + *base = p[1]; + *diac = p[2]; + return 1; + } } - return !!r; + return 0; } @@ -247,8 +271,7 @@ int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint) /* applies unicode Normalization Form C */ { unsigned int *p = xs_unicode_nfd_table; - unsigned int *e = xs_unicode_nfd_table + - sizeof(xs_unicode_nfd_table) / sizeof(unsigned int); + unsigned int *e = p + countof(xs_unicode_nfd_table); while (p < e) { if (p[1] == base && p[2] == diac) { @@ -266,12 +289,23 @@ int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint) int xs_unicode_is_alpha(unsigned int cpoint) /* checks if a codepoint is an alpha (i.e. a letter) */ { - unsigned int *r = bsearch(&cpoint, xs_unicode_alpha_table, - sizeof(xs_unicode_alpha_table) / (sizeof(unsigned int) * 2), - sizeof(unsigned int) * 2, - int_range_cmp); + int b = 0; + int t = countof(xs_unicode_alpha_table) / 2 - 1; - return !!r; + while (t >= b) { + int n = (b + t) / 2; + unsigned int *p = &xs_unicode_alpha_table[n * 2]; + + if (cpoint < p[0]) + t = n - 1; + else + if (cpoint > p[1]) + b = n + 1; + else + return 1; + } + + return 0; } diff --git a/xs_version.h b/xs_version.h index f25a017..4f4dc13 100644 --- a/xs_version.h +++ b/xs_version.h @@ -1 +1 @@ -/* 3582ff265e19407df1d532eb1d90c372fe22ca62 2023-12-08T06:10:40+01:00 */ +/* fd50c72456b717bb235eec8fe5f712da5f695f2b 2023-12-27T12:51:14+01:00 */