/* snac - A simple, minimalistic ActivityPub instance */ /* copyright (c) 2022 - 2023 grunfink / MIT license */ #include "xs.h" #include "xs_regex.h" #include "xs_mime.h" #include "snac.h" /* emoticons, people laughing and such */ struct { const char *key; const char *value; } smileys[] = { { ":-)", "🙂" }, { ":-D", "😀" }, { "X-D", "😆" }, { ";-)", "😉" }, { "B-)", "😎" }, { ">:-(", "😡" }, { ":-(", "😞" }, { ":-*", "😘" }, { ":-/", "😕" }, { "8-o", "😲" }, { "%-)", "🤪" }, { ":_(", "😢" }, { ":-|", "😐" }, { "<3", "💓" }, { ":facepalm:", "🤦" }, { ":shrug:", "🤷" }, { ":shrug2:", "¯\\_(ツ)_/¯" }, { ":eyeroll:", "🙄" }, { ":beer:", "🍺" }, { ":beers:", "🍻" }, { ":munch:", "😱" }, { ":thumb:", "👍" }, { NULL, NULL } }; static xs_str *format_line(const char *line, xs_list **attach) /* formats a line */ { xs_str *s = xs_str_new(NULL); char *p, *v; /* split by markup */ xs *sm = xs_regex_split(line, "(`[^`]+`|\\*\\*?[^\\*]+\\*?\\*|https?:/" "/[^[:space:]]+)"); int n = 0; p = sm; while (xs_list_iter(&p, &v)) { if ((n & 0x1)) { /* markup */ if (xs_startswith(v, "`")) { xs *s1 = xs_crop_i(xs_dup(v), 1, -1); xs *s2 = xs_fmt("%s", s1); s = xs_str_cat(s, s2); } else if (xs_startswith(v, "**")) { xs *s1 = xs_crop_i(xs_dup(v), 2, -2); xs *s2 = xs_fmt("%s", s1); s = xs_str_cat(s, s2); } else if (xs_startswith(v, "*")) { xs *s1 = xs_crop_i(xs_dup(v), 1, -1); xs *s2 = xs_fmt("%s", s1); s = xs_str_cat(s, s2); } else if (xs_startswith(v, "http")) { xs *u = xs_replace(v, "#", "#"); xs *v2 = xs_strip_chars_i(xs_dup(u), "."); const char *mime = xs_mime_by_ext(v2); if (attach != NULL && xs_startswith(mime, "image/")) { /* if it's a link to an image, insert it as an attachment */ xs *d = xs_dict_new(); d = xs_dict_append(d, "mediaType", mime); d = xs_dict_append(d, "url", v2); d = xs_dict_append(d, "name", ""); d = xs_dict_append(d, "type", "Image"); *attach = xs_list_append(*attach, d); } else { xs *s1 = xs_fmt("%s", v2, u); s = xs_str_cat(s, s1); } } else s = xs_str_cat(s, v); } else /* surrounded text, copy directly */ s = xs_str_cat(s, v); n++; } return s; } xs_str *not_really_markdown(const char *content, xs_list **attach) /* formats a content using some Markdown rules */ { xs_str *s = xs_str_new(NULL); int in_pre = 0; int in_blq = 0; xs *list; char *p, *v; /* work by lines */ list = xs_split(content, "\n"); p = list; while (xs_list_iter(&p, &v)) { xs *ss = NULL; if (strcmp(v, "```") == 0) { if (!in_pre) s = xs_str_cat(s, "
");
            else
                s = xs_str_cat(s, "
"); in_pre = !in_pre; continue; } if (in_pre) ss = xs_dup(v); else ss = xs_strip_i(format_line(v, attach)); if (xs_startswith(ss, ">")) { /* delete the > and subsequent spaces */ ss = xs_strip_i(xs_crop_i(ss, 1, 0)); if (!in_blq) { s = xs_str_cat(s, "
"); in_blq = 1; } s = xs_str_cat(s, ss); s = xs_str_cat(s, "
"); continue; } if (in_blq) { s = xs_str_cat(s, "
"); in_blq = 0; } s = xs_str_cat(s, ss); s = xs_str_cat(s, "
"); } if (in_blq) s = xs_str_cat(s, ""); if (in_pre) s = xs_str_cat(s, ""); /* some beauty fixes */ s = xs_replace_i(s, "

", "
"); s = xs_replace_i(s, "

", "
"); s = xs_replace_i(s, "
", ""); { /* traditional emoticons */ int n; for (n = 0; smileys[n].key; n++) s = xs_replace_i(s, smileys[n].key, smileys[n].value); } return s; } const char *valid_tags[] = { "a", "p", "br", "br/", "blockquote", "ul", "ol", "li", "cite", "span", "i", "b", "u", "pre", "code", "em", "strong", NULL }; xs_str *sanitize(const char *content) /* cleans dangerous HTML output */ { xs_str *s = xs_str_new(NULL); xs *sl; int n = 0; char *p, *v; sl = xs_regex_split(content, "]+>"); p = sl; n = 0; while (xs_list_iter(&p, &v)) { if (n & 0x1) { xs *s1 = xs_strip_i(xs_crop_i(xs_dup(v), v[1] == '/' ? 2 : 1, -1)); xs *l1 = xs_split_n(s1, " ", 1); xs *tag = xs_tolower_i(xs_dup(xs_list_get(l1, 0))); xs *s2 = NULL; int i; /* check if it's one of the valid tags */ for (i = 0; valid_tags[i]; i++) { if (strcmp(tag, valid_tags[i]) == 0) break; } if (valid_tags[i]) { /* accepted tag: rebuild it with only the accepted elements */ xs *el = xs_regex_match(v, "(href|rel|class|target)=\"[^\"]*\""); xs *s3 = xs_join(el, " "); s2 = xs_fmt("<%s%s%s%s>", v[1] == '/' ? "/" : "", tag, xs_list_len(el) ? " " : "", s3); s = xs_str_cat(s, s2); } } else { /* non-tag */ s = xs_str_cat(s, v); } n++; } return s; } xs_str *encode_html(const char *str) /* escapes html characters */ { xs_str *encoded = xs_replace(str, "&", "&"); encoded = xs_replace_i(encoded, "<", "<"); encoded = xs_replace_i(encoded, ">", ">"); encoded = xs_replace_i(encoded, "\"", """); encoded = xs_replace_i(encoded, "'", "'"); /* Restore only
. Probably safe. Let's hope nothing goes wrong with this. */ encoded = xs_replace_i(encoded, "<br>", "
"); return encoded; }