From bc5d0d4ed09833640856ee0193a53553dbb1eb20 Mon Sep 17 00:00:00 2001 From: default Date: Mon, 20 Nov 2023 18:33:24 +0100 Subject: [PATCH] Replaced encode_html_strict() with xs_html_encode(). --- Makefile | 7 +- format.c | 16 +--- html.c | 11 +-- snac.c | 1 + snac.h | 3 +- xs.h | 19 +++- xs_html.h | 240 +++++++++++++++++++++++++++++++++++++++++++++++++++ xs_version.h | 2 +- 8 files changed, 273 insertions(+), 26 deletions(-) create mode 100644 xs_html.h diff --git a/Makefile b/Makefile index 0880b3d..9374792 100644 --- a/Makefile +++ b/Makefile @@ -37,9 +37,9 @@ activitypub.o: activitypub.c xs.h xs_json.h xs_curl.h xs_mime.h \ xs_openssl.h xs_regex.h xs_time.h xs_set.h xs_match.h snac.h data.o: data.c xs.h xs_hex.h xs_io.h xs_json.h xs_openssl.h xs_glob.h \ xs_set.h xs_time.h snac.h -format.o: format.c xs.h xs_regex.h xs_mime.h snac.h +format.o: format.c xs.h xs_regex.h xs_mime.h xs_html.h snac.h html.o: html.c xs.h xs_io.h xs_json.h xs_regex.h xs_set.h xs_openssl.h \ - xs_time.h xs_mime.h xs_match.h snac.h + xs_time.h xs_mime.h xs_match.h xs_html.h snac.h http.o: http.c xs.h xs_io.h xs_openssl.h xs_curl.h xs_time.h xs_json.h \ snac.h httpd.o: httpd.c xs.h xs_io.h xs_json.h xs_socket.h xs_httpd.h xs_mime.h \ @@ -50,7 +50,8 @@ mastoapi.o: mastoapi.c xs.h xs_hex.h xs_openssl.h xs_json.h xs_io.h \ snac.h snac.o: snac.c xs.h xs_hex.h xs_io.h xs_unicode.h xs_json.h xs_curl.h \ xs_openssl.h xs_socket.h xs_url.h xs_httpd.h xs_mime.h xs_regex.h \ - xs_set.h xs_time.h xs_glob.h xs_random.h xs_match.h xs_fcgi.h snac.h + xs_set.h xs_time.h xs_glob.h xs_random.h xs_match.h xs_fcgi.h xs_html.h \ + snac.h upgrade.o: upgrade.c xs.h xs_io.h xs_json.h xs_glob.h snac.h utils.o: utils.c xs.h xs_io.h xs_json.h xs_time.h xs_openssl.h \ xs_random.h xs_glob.h snac.h diff --git a/format.c b/format.c index cfe2294..019260c 100644 --- a/format.c +++ b/format.c @@ -4,6 +4,7 @@ #include "xs.h" #include "xs_regex.h" #include "xs_mime.h" +#include "xs_html.h" #include "snac.h" @@ -260,23 +261,10 @@ xs_str *sanitize(const char *content) } -xs_str *encode_html_strict(const char *str) -/* escapes html characters */ -{ - xs_str *encoded = xs_replace(str, "&", "&"); - encoded = xs_replace_i(encoded, "<", "<"); - encoded = xs_replace_i(encoded, ">", ">"); - encoded = xs_replace_i(encoded, "\"", """); - encoded = xs_replace_i(encoded, "'", "'"); - - return encoded; -} - - xs_str *encode_html(const char *str) /* escapes html characters */ { - xs_str *encoded = encode_html_strict(str); + xs_str *encoded = xs_html_encode((char *)str); /* Restore only
. Probably safe. Let's hope nothing goes wrong with this. */ encoded = xs_replace_i(encoded, "<br>", "
"); diff --git a/html.c b/html.c index 50dc25b..4673130 100644 --- a/html.c +++ b/html.c @@ -10,6 +10,7 @@ #include "xs_time.h" #include "xs_mime.h" #include "xs_match.h" +#include "xs_html.h" #include "snac.h" @@ -2137,10 +2138,10 @@ int html_get_handler(const xs_dict *req, const char *q_path, xs *bio = not_really_markdown(xs_dict_get(snac.config, "bio"), NULL); char *p, *v; - xs *es1 = encode_html_strict(xs_dict_get(snac.config, "name")); - xs *es2 = encode_html_strict(snac.uid); - xs *es3 = encode_html_strict(xs_dict_get(srv_config, "host")); - xs *es4 = encode_html_strict(bio); + xs *es1 = xs_html_encode(xs_dict_get(snac.config, "name")); + xs *es2 = xs_html_encode(snac.uid); + xs *es3 = xs_html_encode(xs_dict_get(srv_config, "host")); + xs *es4 = xs_html_encode(bio); rss = xs_fmt( "\n" "\n" @@ -2168,7 +2169,7 @@ int html_get_handler(const xs_dict *req, const char *q_path, if (!xs_startswith(id, snac.actor)) continue; - xs *content = encode_html_strict(xs_dict_get(msg, "content")); + xs *content = xs_html_encode(xs_dict_get(msg, "content")); // We SHOULD only use sanitized one for description. // So, only encode for feed title, while the description just keep it sanitized as is. diff --git a/snac.c b/snac.c index 6394608..6ce7741 100644 --- a/snac.c +++ b/snac.c @@ -21,6 +21,7 @@ #include "xs_random.h" #include "xs_match.h" #include "xs_fcgi.h" +#include "xs_html.h" #include "snac.h" diff --git a/snac.h b/snac.h index 3101fbb..cb240d8 100644 --- a/snac.h +++ b/snac.h @@ -1,7 +1,7 @@ /* snac - A simple, minimalistic ActivityPub instance */ /* copyright (c) 2022 - 2023 grunfink et al. / MIT license */ -#define VERSION "2.43" +#define VERSION "2.44-dev" #define USER_AGENT "snac/" VERSION @@ -266,7 +266,6 @@ int activitypub_post_handler(const xs_dict *req, const char *q_path, xs_str *not_really_markdown(const char *content, xs_list **attach); xs_str *sanitize(const char *content); -xs_str *encode_html_strict(const char *str); xs_str *encode_html(const char *str); xs_str *html_timeline(snac *user, const xs_list *list, int local, diff --git a/xs.h b/xs.h index 63715ac..d7d9169 100644 --- a/xs.h +++ b/xs.h @@ -62,7 +62,8 @@ xs_str *xs_str_new(const char *str); xs_str *xs_str_new_sz(const char *mem, int sz); xs_str *xs_str_wrap_i(const char *prefix, xs_str *str, const char *suffix); #define xs_str_prepend_i(str, prefix) xs_str_wrap_i(prefix, str, NULL) -#define xs_str_cat(str, suffix) xs_str_wrap_i(NULL, str, suffix) +xs_str *_xs_str_cat(xs_str *str, const char *strs[]); +#define xs_str_cat(str, ...) _xs_str_cat(str, (const char *[]){ __VA_ARGS__, NULL }) xs_str *xs_replace_in(xs_str *str, const char *sfrom, const char *sto, int times); #define xs_replace_i(str, sfrom, sto) xs_replace_in(str, sfrom, sto, XS_ALL) #define xs_replace(str, sfrom, sto) xs_replace_in(xs_dup(str), sfrom, sto, XS_ALL) @@ -451,6 +452,22 @@ xs_str *xs_str_wrap_i(const char *prefix, xs_str *str, const char *suffix) } +xs_str *_xs_str_cat(xs_str *str, const char *strs[]) +/* concatenates all strings after str */ +{ + int o = strlen(str); + + while (*strs) { + int sz = strlen(*strs); + str = xs_insert_m(str, o, *strs, sz); + o += sz; + strs++; + } + + return str; +} + + xs_str *xs_replace_in(xs_str *str, const char *sfrom, const char *sto, int times) /* replaces inline all sfrom with sto */ { diff --git a/xs_html.h b/xs_html.h new file mode 100644 index 0000000..744df5b --- /dev/null +++ b/xs_html.h @@ -0,0 +1,240 @@ +/* copyright (c) 2022 - 2023 grunfink et al. / MIT license */ + +#ifndef _XS_HTML_H + +#define _XS_HTML_H + +typedef struct xs_html xs_html; + +xs_str *xs_html_encode(char *str); + +xs_html *xs_html_attr(char *key, char *value); +xs_html *xs_html_text(char *content); +xs_html *xs_html_raw(char *content); + +xs_html *xs_html_add(xs_html *tag, xs_html *data); + +xs_html *_xs_html_tag(char *tag, xs_html *var[]); +#define xs_html_tag(tag, ...) _xs_html_tag(tag, (xs_html *[]) { __VA_ARGS__, NULL }) +xs_html *_xs_html_sctag(char *tag, xs_html *var[]); +#define xs_html_sctag(tag, ...) _xs_html_sctag(tag, (xs_html *[]) { __VA_ARGS__, NULL }) +xs_str *_xs_html_render(xs_html *h, xs_str *s); +#define xs_html_render(h) _xs_html_render(h, xs_str_new(NULL)) + +#ifdef XS_IMPLEMENTATION + +typedef enum { + XS_HTML_TAG, + XS_HTML_SCTAG, + XS_HTML_ATTR, + XS_HTML_TEXT +} xs_html_type; + +struct xs_html { + xs_html_type type; + xs_str *content; + xs_html *f_attr; + xs_html *l_attr; + xs_html *f_tag; + xs_html *l_tag; + xs_html *next; +}; + +xs_str *xs_html_encode(char *str) +/* encodes str using HTML entities */ +{ + xs_str *s = xs_str_new(NULL); + int o = 0; + char *e = str + strlen(str); + + for (;;) { + char *ec = "<>\"'&"; /* characters to escape */ + char *q = e; + int z; + + /* find the nearest happening of a char */ + while (*ec) { + char *m = memchr(str, *ec++, q - str); + if (m) + q = m; + } + + /* copy string to here */ + z = q - str; + s = xs_insert_m(s, o, str, z); + o += z; + + /* if q points to the end, nothing more to do */ + if (q == e) + break; + + /* insert the escaped char */ + char tmp[8]; + snprintf(tmp, sizeof(tmp), "&#%d;", *q); + + z = strlen(tmp); + s = xs_insert_m(s, o, tmp, z); + o += z; + + str = q + 1; + } + + return s; +} + + +#define XS_HTML_NEW() memset(xs_realloc(NULL, sizeof(xs_html)), '\0', sizeof(xs_html)) + +xs_html *xs_html_attr(char *key, char *value) +/* creates an HTML block with an attribute */ +{ + xs_html *a = XS_HTML_NEW(); + + a->type = XS_HTML_ATTR; + + if (value) { + xs *ev = xs_html_encode(value); + a->content = xs_fmt("%s=\"%s\"", key, ev); + } + else + a->content = xs_dup(key); + + return a; +} + + +xs_html *xs_html_text(char *content) +/* creates an HTML block of text, escaping it previously */ +{ + xs_html *a = XS_HTML_NEW(); + + a->type = XS_HTML_TEXT; + a->content = xs_html_encode(content); + + return a; +} + + +xs_html *xs_html_raw(char *content) +/* creates an HTML block without escaping (for pre-formatted HTML, comments, etc) */ +{ + xs_html *a = XS_HTML_NEW(); + + a->type = XS_HTML_TEXT; + a->content = xs_dup(content); + + return a; +} + + +xs_html *xs_html_add(xs_html *tag, xs_html *data) +/* add data (attrs, tags or text) to a tag */ +{ + xs_html **first; + xs_html **last; + + if (data->type == XS_HTML_ATTR) { + first = &tag->f_attr; + last = &tag->l_attr; + } + else { + first = &tag->f_tag; + last = &tag->l_tag; + } + + if (*first == NULL) + *first = data; + + if (*last != NULL) + (*last)->next = data; + + *last = data; + + return tag; +} + + +static xs_html *_xs_html_tag_t(xs_html_type type, char *tag, xs_html *var[]) +/* creates a tag with a variable list of attributes and subtags */ +{ + xs_html *a = XS_HTML_NEW(); + + a->type = type; + a->content = xs_dup(tag); + + while (*var) + xs_html_add(a, *var++); + + return a; +} + + +xs_html *_xs_html_tag(char *tag, xs_html *var[]) +{ + return _xs_html_tag_t(XS_HTML_TAG, tag, var); +} + + +xs_html *_xs_html_sctag(char *tag, xs_html *var[]) +{ + return _xs_html_tag_t(XS_HTML_SCTAG, tag, var); +} + + +xs_str *_xs_html_render(xs_html *h, xs_str *s) +/* renders the tag and its subtags */ +{ + xs_html *st; + + switch (h->type) { + case XS_HTML_TAG: + case XS_HTML_SCTAG: + s = xs_str_cat(s, "<", h->content); + + /* render the attributes */ + st = h->f_attr; + while (st) { + xs_html *nst = st->next; + s = _xs_html_render(st, s); + st = nst; + } + + if (h->type == XS_HTML_SCTAG) { + /* self-closing tags should not have subtags */ + s = xs_str_cat(s, "/>"); + } + else { + s = xs_str_cat(s, ">"); + + /* render the subtags */ + st = h->f_tag; + while (st) { + xs_html *nst = st->next; + s = _xs_html_render(st, s); + st = nst; + } + + s = xs_str_cat(s, "content, ">"); + } + + break; + + case XS_HTML_ATTR: + s = xs_str_cat(s, " ", h->content); + break; + + case XS_HTML_TEXT: + s = xs_str_cat(s, h->content); + break; + } + + xs_free(h->content); + xs_free(h); + + return s; +} + + +#endif /* XS_IMPLEMENTATION */ + +#endif /* _XS_HTML_H */ diff --git a/xs_version.h b/xs_version.h index e5a5e49..b9b734b 100644 --- a/xs_version.h +++ b/xs_version.h @@ -1 +1 @@ -/* 416f5ffa99ecd4a3ec25d273b986d3d99dc92d22 */ +/* 63beb583926bb5dfec89e1d694172cc887614460 2023-11-19T19:51:05+01:00 */