2022-10-07 19:30:54 +03:00
|
|
|
/* snac - A simple, minimalistic ActivityPub instance */
|
2023-01-17 11:50:16 +03:00
|
|
|
/* copyright (c) 2022 - 2023 grunfink / MIT license */
|
2022-10-07 19:30:54 +03:00
|
|
|
|
|
|
|
#include "xs.h"
|
|
|
|
#include "xs_regex.h"
|
|
|
|
|
|
|
|
#include "snac.h"
|
|
|
|
|
2022-10-07 20:02:29 +03:00
|
|
|
/* emoticons, people laughing and such */
|
|
|
|
|
|
|
|
struct {
|
|
|
|
const char *key;
|
|
|
|
const char *value;
|
|
|
|
} smileys[] = {
|
|
|
|
{ ":-)", "🙂" },
|
|
|
|
{ ":-D", "😀" },
|
|
|
|
{ "X-D", "😆" },
|
|
|
|
{ ";-)", "😉" },
|
|
|
|
{ "B-)", "😎" },
|
2023-02-01 12:09:11 +03:00
|
|
|
{ ">:-(", "😡" },
|
2022-10-07 20:02:29 +03:00
|
|
|
{ ":-(", "😞" },
|
|
|
|
{ ":-*", "😘" },
|
|
|
|
{ ":-/", "😕" },
|
2022-10-16 21:23:01 +03:00
|
|
|
{ "8-o", "😲" },
|
2022-10-07 20:02:29 +03:00
|
|
|
{ "%-)", "🤪" },
|
|
|
|
{ ":_(", "😢" },
|
|
|
|
{ ":-|", "😐" },
|
2022-11-13 19:14:14 +03:00
|
|
|
{ "<3", "💓" },
|
2022-10-07 20:02:29 +03:00
|
|
|
{ ":facepalm:", "🤦" },
|
|
|
|
{ ":shrug:", "🤷" },
|
2022-12-03 22:54:44 +03:00
|
|
|
{ ":shrug2:", "¯\\_(ツ)_/¯" },
|
2022-10-07 20:48:29 +03:00
|
|
|
{ ":eyeroll:", "🙄" },
|
2022-10-12 09:34:00 +03:00
|
|
|
{ ":beer:", "🍺" },
|
|
|
|
{ ":beers:", "🍻" },
|
2022-10-16 21:23:01 +03:00
|
|
|
{ ":munch:", "😱" },
|
2022-11-13 19:14:14 +03:00
|
|
|
{ ":thumb:", "👍" },
|
2022-10-07 20:02:29 +03:00
|
|
|
{ NULL, NULL }
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2022-11-13 11:12:20 +03:00
|
|
|
static d_char *format_line(const char *line)
|
|
|
|
/* formats a line */
|
2022-10-07 19:30:54 +03:00
|
|
|
{
|
2022-11-13 11:12:20 +03:00
|
|
|
d_char *s = xs_str_new(NULL);
|
2022-10-07 19:30:54 +03:00
|
|
|
char *p, *v;
|
|
|
|
|
2022-11-13 11:12:20 +03:00
|
|
|
/* split by markup */
|
|
|
|
xs *sm = xs_regex_split(line,
|
|
|
|
"(`[^`]+`|\\*\\*?[^\\*]+\\*?\\*|https?:/" "/[^[:space:]]+)");
|
|
|
|
int n = 0;
|
2022-11-06 08:55:06 +03:00
|
|
|
|
2022-11-13 11:12:20 +03:00
|
|
|
p = sm;
|
|
|
|
while (xs_list_iter(&p, &v)) {
|
|
|
|
if ((n & 0x1)) {
|
|
|
|
/* markup */
|
|
|
|
if (xs_startswith(v, "`")) {
|
2023-01-12 11:28:02 +03:00
|
|
|
xs *s1 = xs_crop_i(xs_dup(v), 1, -1);
|
2022-11-13 11:12:20 +03:00
|
|
|
xs *s2 = xs_fmt("<code>%s</code>", s1);
|
|
|
|
s = xs_str_cat(s, s2);
|
2022-10-07 19:30:54 +03:00
|
|
|
}
|
|
|
|
else
|
2022-11-13 11:12:20 +03:00
|
|
|
if (xs_startswith(v, "**")) {
|
2023-01-12 11:28:02 +03:00
|
|
|
xs *s1 = xs_crop_i(xs_dup(v), 2, -2);
|
2022-11-13 11:12:20 +03:00
|
|
|
xs *s2 = xs_fmt("<b>%s</b>", s1);
|
|
|
|
s = xs_str_cat(s, s2);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
if (xs_startswith(v, "*")) {
|
2023-01-12 11:28:02 +03:00
|
|
|
xs *s1 = xs_crop_i(xs_dup(v), 1, -1);
|
2022-11-13 11:12:20 +03:00
|
|
|
xs *s2 = xs_fmt("<i>%s</i>", s1);
|
|
|
|
s = xs_str_cat(s, s2);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
if (xs_startswith(v, "http")) {
|
2023-01-13 09:02:04 +03:00
|
|
|
xs *v2 = xs_strip_chars_i(xs_dup(v), ".");
|
|
|
|
xs *s1 = xs_fmt("<a href=\"%s\" target=\"_blank\">%s</a>", v2, v);
|
2022-11-13 11:12:20 +03:00
|
|
|
s = xs_str_cat(s, s1);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
s = xs_str_cat(s, v);
|
2022-10-07 19:30:54 +03:00
|
|
|
}
|
2022-11-13 11:12:20 +03:00
|
|
|
else
|
|
|
|
/* surrounded text, copy directly */
|
|
|
|
s = xs_str_cat(s, v);
|
|
|
|
|
|
|
|
n++;
|
2022-10-07 19:30:54 +03:00
|
|
|
}
|
|
|
|
|
2022-11-13 11:12:20 +03:00
|
|
|
return s;
|
|
|
|
}
|
2022-10-07 19:30:54 +03:00
|
|
|
|
2022-11-13 11:12:20 +03:00
|
|
|
|
2022-11-16 15:13:31 +03:00
|
|
|
d_char *not_really_markdown(const char *content)
|
2022-11-13 11:12:20 +03:00
|
|
|
/* formats a content using some Markdown rules */
|
|
|
|
{
|
|
|
|
d_char *s = xs_str_new(NULL);
|
|
|
|
int in_pre = 0;
|
|
|
|
int in_blq = 0;
|
|
|
|
xs *list;
|
|
|
|
char *p, *v;
|
|
|
|
|
|
|
|
/* work by lines */
|
2022-12-14 06:55:47 +03:00
|
|
|
list = xs_split(content, "\n");
|
2022-10-07 19:30:54 +03:00
|
|
|
|
2022-12-14 06:55:47 +03:00
|
|
|
p = list;
|
2022-10-07 19:30:54 +03:00
|
|
|
while (xs_list_iter(&p, &v)) {
|
2022-11-13 11:12:20 +03:00
|
|
|
xs *ss = NULL;
|
2022-10-07 19:30:54 +03:00
|
|
|
|
2022-11-13 11:12:20 +03:00
|
|
|
if (strcmp(v, "```") == 0) {
|
2022-10-07 19:30:54 +03:00
|
|
|
if (!in_pre)
|
|
|
|
s = xs_str_cat(s, "<pre>");
|
|
|
|
else
|
|
|
|
s = xs_str_cat(s, "</pre>");
|
|
|
|
|
|
|
|
in_pre = !in_pre;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2022-11-13 11:12:20 +03:00
|
|
|
if (in_pre)
|
|
|
|
ss = xs_dup(v);
|
|
|
|
else
|
2023-01-12 11:28:02 +03:00
|
|
|
ss = xs_strip_i(format_line(v));
|
2022-11-13 11:12:20 +03:00
|
|
|
|
2022-10-07 19:30:54 +03:00
|
|
|
if (xs_startswith(ss, ">")) {
|
|
|
|
/* delete the > and subsequent spaces */
|
2023-01-12 11:28:02 +03:00
|
|
|
ss = xs_strip_i(xs_crop_i(ss, 1, 0));
|
2022-10-07 19:30:54 +03:00
|
|
|
|
|
|
|
if (!in_blq) {
|
|
|
|
s = xs_str_cat(s, "<blockquote>");
|
|
|
|
in_blq = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
s = xs_str_cat(s, ss);
|
|
|
|
s = xs_str_cat(s, "<br>");
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (in_blq) {
|
|
|
|
s = xs_str_cat(s, "</blockquote>");
|
|
|
|
in_blq = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
s = xs_str_cat(s, ss);
|
|
|
|
s = xs_str_cat(s, "<br>");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (in_blq)
|
|
|
|
s = xs_str_cat(s, "</blockquote>");
|
|
|
|
if (in_pre)
|
|
|
|
s = xs_str_cat(s, "</pre>");
|
|
|
|
|
|
|
|
/* some beauty fixes */
|
2022-11-13 11:12:20 +03:00
|
|
|
s = xs_replace_i(s, "<br><br><blockquote>", "<br><blockquote>");
|
2022-10-07 19:30:54 +03:00
|
|
|
s = xs_replace_i(s, "</blockquote><br>", "</blockquote>");
|
2022-11-01 21:49:35 +03:00
|
|
|
s = xs_replace_i(s, "</pre><br>", "</pre>");
|
2022-10-07 19:30:54 +03:00
|
|
|
|
2022-10-07 20:02:29 +03:00
|
|
|
{
|
|
|
|
/* traditional emoticons */
|
|
|
|
int n;
|
|
|
|
|
|
|
|
for (n = 0; smileys[n].key; n++)
|
|
|
|
s = xs_replace_i(s, smileys[n].key, smileys[n].value);
|
|
|
|
}
|
|
|
|
|
2022-11-13 10:41:50 +03:00
|
|
|
return s;
|
2022-10-07 19:30:54 +03:00
|
|
|
}
|
2022-10-28 19:06:42 +03:00
|
|
|
|
|
|
|
|
|
|
|
const char *valid_tags[] = {
|
2022-11-16 15:13:31 +03:00
|
|
|
"a", "p", "br", "br/", "blockquote", "ul", "li",
|
2022-10-30 08:01:46 +03:00
|
|
|
"span", "i", "b", "pre", "code", "em", "strong", NULL
|
2022-10-28 19:06:42 +03:00
|
|
|
};
|
|
|
|
|
2022-11-16 15:13:31 +03:00
|
|
|
d_char *sanitize(const char *content)
|
2022-10-28 19:06:42 +03:00
|
|
|
/* cleans dangerous HTML output */
|
|
|
|
{
|
|
|
|
d_char *s = xs_str_new(NULL);
|
|
|
|
xs *sl;
|
|
|
|
int n = 0;
|
|
|
|
char *p, *v;
|
|
|
|
|
2023-03-07 11:56:16 +03:00
|
|
|
sl = xs_regex_split(content, "</?[^>]+>");
|
2022-10-28 19:06:42 +03:00
|
|
|
|
|
|
|
p = sl;
|
|
|
|
|
2023-03-06 13:06:35 +03:00
|
|
|
n = 0;
|
2022-10-28 19:06:42 +03:00
|
|
|
while (xs_list_iter(&p, &v)) {
|
|
|
|
if (n & 0x1) {
|
2023-01-12 11:28:02 +03:00
|
|
|
xs *s1 = xs_strip_i(xs_crop_i(xs_dup(v), v[1] == '/' ? 2 : 1, -1));
|
2022-10-28 19:06:42 +03:00
|
|
|
xs *l1 = xs_split_n(s1, " ", 1);
|
2023-01-12 11:28:02 +03:00
|
|
|
xs *tag = xs_tolower_i(xs_dup(xs_list_get(l1, 0)));
|
2022-11-16 18:53:59 +03:00
|
|
|
xs *s2 = NULL;
|
2022-10-28 19:06:42 +03:00
|
|
|
int i;
|
|
|
|
|
|
|
|
/* check if it's one of the valid tags */
|
|
|
|
for (i = 0; valid_tags[i]; i++) {
|
|
|
|
if (strcmp(tag, valid_tags[i]) == 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (valid_tags[i]) {
|
2022-11-16 18:53:59 +03:00
|
|
|
/* accepted tag: rebuild it with only the accepted elements */
|
|
|
|
xs *el = xs_regex_match(v, "(href|rel|class|target)=\"[^\"]*\"");
|
|
|
|
xs *s3 = xs_join(el, " ");
|
|
|
|
|
2022-11-16 19:46:55 +03:00
|
|
|
s2 = xs_fmt("<%s%s%s%s>",
|
2022-11-16 19:49:33 +03:00
|
|
|
v[1] == '/' ? "/" : "", tag, xs_list_len(el) ? " " : "", s3);
|
2022-10-28 19:06:42 +03:00
|
|
|
}
|
|
|
|
else {
|
2022-11-16 18:53:59 +03:00
|
|
|
/* bad tag: escape it */
|
|
|
|
s2 = xs_replace(v, "<", "<");
|
2022-10-28 19:06:42 +03:00
|
|
|
}
|
2022-11-16 18:53:59 +03:00
|
|
|
|
|
|
|
s = xs_str_cat(s, s2);
|
2022-10-28 19:06:42 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* non-tag */
|
|
|
|
s = xs_str_cat(s, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
n++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|