From 5ee9504b1801f106f152ef9d60620ad88454d390 Mon Sep 17 00:00:00 2001 From: default Date: Fri, 28 Oct 2022 18:06:42 +0200 Subject: [PATCH] Be more aggressive in HTML sanitization. --- format.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ html.c | 3 ++- snac.h | 2 ++ 3 files changed, 55 insertions(+), 1 deletion(-) diff --git a/format.c b/format.c index 69efbdb..1c2a97a 100644 --- a/format.c +++ b/format.c @@ -150,3 +150,54 @@ d_char *not_really_markdown(char *content, d_char **f_content) return *f_content; } + + +const char *valid_tags[] = { + "a", "p", "br", "img", "blockquote", "ul", "li", "span", NULL +}; + +d_char *sanitize(d_char *content) +/* cleans dangerous HTML output */ +{ + d_char *s = xs_str_new(NULL); + xs *sl; + int n = 0; + char *p, *v; + + sl = xs_regex_split(content, "]+>"); + + p = sl; + + while (xs_list_iter(&p, &v)) { + if (n & 0x1) { + xs *s1 = xs_strip(xs_crop(xs_dup(v), v[1] == '/' ? 2 : 1, -1)); + xs *l1 = xs_split_n(s1, " ", 1); + xs *tag = xs_tolower(xs_dup(xs_list_get(l1, 0))); + int i; + + /* check if it's one of the valid tags */ + for (i = 0; valid_tags[i]; i++) { + if (strcmp(tag, valid_tags[i]) == 0) + break; + } + + if (valid_tags[i]) { + /* accepted tag */ + s = xs_str_cat(s, v); + } + else { + /* bad tag */ + xs *s2 = xs_replace(v, "<", "<"); + s = xs_str_cat(s, s2); + } + } + else { + /* non-tag */ + s = xs_str_cat(s, v); + } + + n++; + } + + return s; +} diff --git a/html.c b/html.c index 7fd1294..43638d5 100644 --- a/html.c +++ b/html.c @@ -600,8 +600,9 @@ d_char *html_entry(snac *snac, d_char *os, char *msg, xs_set *seen, int local, i } } + xs *sc = sanitize(c); - s = xs_str_cat(s, c); + s = xs_str_cat(s, sc); } s = xs_str_cat(s, "\n"); diff --git a/snac.h b/snac.h index f11dcc4..7a3a5f3 100644 --- a/snac.h +++ b/snac.h @@ -134,6 +134,8 @@ int activitypub_post_handler(d_char *req, char *q_path, char **body, int *b_size, char **ctype); d_char *not_really_markdown(char *content, d_char **f_content); +d_char *sanitize(d_char *str); + int html_get_handler(d_char *req, char *q_path, char **body, int *b_size, char **ctype); int html_post_handler(d_char *req, char *q_path, d_char *payload, int p_size, char **body, int *b_size, char **ctype);