snac2/xs_regex.h

/* copyright (c) 2022 - 2024 grunfink et al. / MIT license */

#ifndef _XS_REGEX_H

#define _XS_REGEX_H

int xs_regex_match(const char *str, const char *rx);
xs_list *xs_regex_split_n(const char *str, const char *rx, int count);
#define xs_regex_split(str, rx) xs_regex_split_n(str, rx, XS_ALL)
xs_list *xs_regex_select_n(const char *str, const char *rx, int count);
#define xs_regex_select(str, rx) xs_regex_select_n(str, rx, XS_ALL)
xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count);
#define xs_regex_replace_i(str, rx, rep) xs_regex_replace_in(str, rx, rep, XS_ALL)
#define xs_regex_replace_n(str, rx, rep, count) xs_regex_replace_in(xs_dup(str), rx, rep, count)
#define xs_regex_replace(str, rx, rep) xs_regex_replace_in(xs_dup(str), rx, rep, XS_ALL)

#ifdef XS_IMPLEMENTATION

#ifdef __TINYC__
/* fix a compilation error in tcc */
#define _REGEX_NELTS(n)
#endif

#include <regex.h>

xs_list *xs_regex_split_n(const char *str, const char *rx, int count)
/* splits str using regex as a separator, at most count times.
    Always returns a list:
    len == 0: regcomp error
    len == 1: full string (no matches)
    len == odd: first part [ separator / next part ]...
*/
{
    regex_t re;
    regmatch_t rm;
    int offset = 0;
    xs_list *list = xs_list_new();
    const char *p;

    if (regcomp(&re, rx, REG_EXTENDED))
        return list;

    while (count > 0 && !regexec(&re, (p = str + offset), 1, &rm, offset > 0 ? REG_NOTBOL : 0)) {
        /* add first the leading part of the string */
        xs *s1 = xs_str_new_sz(p, rm.rm_so);
        list = xs_list_append(list, s1);

        /* add now the matched text as the separator */
        xs *s2 = xs_str_new_sz(p + rm.rm_so, rm.rm_eo - rm.rm_so);
        list = xs_list_append(list, s2);

        /* move forward */
        offset += rm.rm_eo;

        count--;
    }

    /* add the rest of the string */
    list = xs_list_append(list, p);

    regfree(&re);

    return list;
}


xs_list *xs_regex_select_n(const char *str, const char *rx, int count)
/* selects all matches and return them as a list */
{
    xs_list *list = xs_list_new();
    xs *split = NULL;
    const xs_val *v;
    int n = 0;

    /* split */
    split = xs_regex_split_n(str, rx, count);

    /* now iterate to get only the 'separators' (odd ones) */
    xs_list_foreach(split, v) {
        if (n & 0x1)
            list = xs_list_append(list, v);

        n++;
    }

    return list;
}


xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count)
/* replaces all matches with the rep string. If it contains unescaped &,
   they are replaced with the match */
{
    xs_str *s = xs_str_new(NULL);
    xs *split = xs_regex_split_n(str, rx, count);
    const xs_val *v;
    int n = 0;
    int pholder = !!strchr(rep, '&');

    xs_list_foreach(split, v) {
        if (n & 0x1) {
            if (pholder) {
                /* rep has a placeholder; process char by char */
                const char *p = rep;

                while (*p) {
                    if (*p == '&')
                        s = xs_str_cat(s, v);
                    else {
                        if (*p == '\\')
                            p++;

                        if (!*p)
                            break;

                        s = xs_append_m(s, p, 1);
                    }

                    p++;
                }
            }
            else
                s = xs_str_cat(s, rep);
        }
        else
            s = xs_str_cat(s, v);

        n++;
    }

    xs_free(str);

    return s;
}


int xs_regex_match(const char *str, const char *rx)
/* returns if str matches the regex at least once */
{
    xs *l = xs_regex_select_n(str, rx, 1);

    return xs_list_len(l) == 1;
}


#endif /* XS_IMPLEMENTATION */

#endif /* XS_REGEX_H */
Updated copyright year. 2024-01-04 11:22:03 +03:00			`/* copyright (c) 2022 - 2024 grunfink et al. / MIT license */`
Backported xs_regex.h from xs. 2022-09-27 11:03:41 +03:00
			`#ifndef _XS_REGEX_H`

			`#define _XS_REGEX_H`

Use xs_regex_match() where applicable. 2024-05-15 14:27:23 +03:00			`int xs_regex_match(const char str, const char rx);`
Backport from xs. 2023-01-28 19:49:02 +03:00			`xs_list xs_regex_split_n(const char str, const char *rx, int count);`
Backport from xs. 2022-11-24 10:47:02 +03:00			`#define xs_regex_split(str, rx) xs_regex_split_n(str, rx, XS_ALL)`
Backport from xs. 2023-09-17 03:52:44 +03:00			`xs_list xs_regex_select_n(const char str, const char *rx, int count);`
			`#define xs_regex_select(str, rx) xs_regex_select_n(str, rx, XS_ALL)`
Convert 'Link' attachments that have a media extension to something more useful. 2023-08-17 18:38:39 +03:00			`xs_list xs_regex_replace_in(xs_str str, const char rx, const char rep, int count);`
			`#define xs_regex_replace_i(str, rx, rep) xs_regex_replace_in(str, rx, rep, XS_ALL)`
			`#define xs_regex_replace_n(str, rx, rep, count) xs_regex_replace_in(xs_dup(str), rx, rep, count)`
			`#define xs_regex_replace(str, rx, rep) xs_regex_replace_in(xs_dup(str), rx, rep, XS_ALL)`
Backported xs_regex.h from xs. 2022-09-27 11:03:41 +03:00
			`#ifdef XS_IMPLEMENTATION`

Backport from xs (fix regex.h compilation with tcc). 2024-05-25 09:24:08 +03:00			`#ifdef __TINYC__`
			`/* fix a compilation error in tcc */`
			`#define _REGEX_NELTS(n)`
			`#endif`

Backported xs_regex.h from xs. 2022-09-27 11:03:41 +03:00			`#include <regex.h>`

Backport from xs. 2023-01-28 19:49:02 +03:00			`xs_list xs_regex_split_n(const char str, const char *rx, int count)`
Use xs_regex_match() where applicable. 2024-05-15 14:27:23 +03:00			`/* splits str using regex as a separator, at most count times.`
			`Always returns a list:`
			`len == 0: regcomp error`
			`len == 1: full string (no matches)`
			`len == odd: first part [ separator / next part ]...`
			`*/`
Backported xs_regex.h from xs. 2022-09-27 11:03:41 +03:00			`{`
			`regex_t re;`
			`regmatch_t rm;`
			`int offset = 0;`
Use xs_regex_match() where applicable. 2024-05-15 14:27:23 +03:00			`xs_list *list = xs_list_new();`
Backport from xs. 2022-09-27 18:19:59 +03:00			`const char *p;`
Backported xs_regex.h from xs. 2022-09-27 11:03:41 +03:00
			`if (regcomp(&re, rx, REG_EXTENDED))`
Use xs_regex_match() where applicable. 2024-05-15 14:27:23 +03:00			`return list;`
Backported xs_regex.h from xs. 2022-09-27 11:03:41 +03:00
			`while (count > 0 && !regexec(&re, (p = str + offset), 1, &rm, offset > 0 ? REG_NOTBOL : 0)) {`
Backport from xs. 2022-09-27 18:19:59 +03:00			`/* add first the leading part of the string */`
Backport from xs. 2023-09-13 19:19:19 +03:00			`xs *s1 = xs_str_new_sz(p, rm.rm_so);`
			`list = xs_list_append(list, s1);`
Backported xs_regex.h from xs. 2022-09-27 11:03:41 +03:00
Backport from xs. 2022-09-27 18:19:59 +03:00			`/* add now the matched text as the separator */`
Backport from xs. 2023-09-13 19:19:19 +03:00			`xs *s2 = xs_str_new_sz(p + rm.rm_so, rm.rm_eo - rm.rm_so);`
			`list = xs_list_append(list, s2);`
Backported xs_regex.h from xs. 2022-09-27 11:03:41 +03:00
Backport from xs. 2022-09-27 18:19:59 +03:00			`/* move forward */`
Backported xs_regex.h from xs. 2022-09-27 11:03:41 +03:00			`offset += rm.rm_eo;`

			`count--;`
			`}`

Backport from xs. 2022-09-27 18:19:59 +03:00			`/* add the rest of the string */`
			`list = xs_list_append(list, p);`

Backported xs_regex.h from xs. 2022-09-27 11:03:41 +03:00			`regfree(&re);`

			`return list;`
			`}`

Backport from xs. 2022-09-27 18:19:59 +03:00
Backport from xs. 2023-09-17 03:52:44 +03:00			`xs_list xs_regex_select_n(const char str, const char *rx, int count)`
			`/* selects all matches and return them as a list */`
Backport from xs. 2022-09-27 18:19:59 +03:00			`{`
Backport from xs. 2023-01-28 19:49:02 +03:00			`xs_list *list = xs_list_new();`
Backport from xs. 2022-09-27 18:19:59 +03:00			`xs *split = NULL;`
Added more const. 2024-05-23 11:01:37 +03:00			`const xs_val *v;`
Backport from xs. 2022-09-27 18:19:59 +03:00			`int n = 0;`

			`/* split */`
Backport from xs. 2022-10-07 14:48:53 +03:00			`split = xs_regex_split_n(str, rx, count);`
Backport from xs. 2022-09-27 18:19:59 +03:00
			`/* now iterate to get only the 'separators' (odd ones) */`
Backport from xs. 2024-08-30 20:10:26 +03:00			`xs_list_foreach(split, v) {`
Backport from xs. 2022-09-27 18:19:59 +03:00			`if (n & 0x1)`
			`list = xs_list_append(list, v);`

			`n++;`
			`}`

			`return list;`
			`}`

mastoapi: minor fix in verify_credentials. 2023-08-16 19:18:46 +03:00
Convert 'Link' attachments that have a media extension to something more useful. 2023-08-17 18:38:39 +03:00			`xs_list xs_regex_replace_in(xs_str str, const char rx, const char rep, int count)`
mastoapi: minor fix in verify_credentials. 2023-08-16 19:18:46 +03:00			`/* replaces all matches with the rep string. If it contains unescaped &,`
			`they are replaced with the match */`
			`{`
			`xs_str *s = xs_str_new(NULL);`
			`xs *split = xs_regex_split_n(str, rx, count);`
Added more const. 2024-05-23 11:01:37 +03:00			`const xs_val *v;`
mastoapi: minor fix in verify_credentials. 2023-08-16 19:18:46 +03:00			`int n = 0;`
			`int pholder = !!strchr(rep, '&');`

Backport from xs. 2024-08-30 20:10:26 +03:00			`xs_list_foreach(split, v) {`
mastoapi: minor fix in verify_credentials. 2023-08-16 19:18:46 +03:00			`if (n & 0x1) {`
			`if (pholder) {`
			`/* rep has a placeholder; process char by char */`
			`const char *p = rep;`

			`while (*p) {`
			`if (*p == '&')`
			`s = xs_str_cat(s, v);`
			`else {`
			`if (*p == '\\')`
			`p++;`

			`if (!*p)`
			`break;`

			`s = xs_append_m(s, p, 1);`
			`}`

			`p++;`
			`}`
			`}`
			`else`
			`s = xs_str_cat(s, rep);`
			`}`
			`else`
			`s = xs_str_cat(s, v);`

			`n++;`
			`}`

Convert 'Link' attachments that have a media extension to something more useful. 2023-08-17 18:38:39 +03:00			`xs_free(str);`

mastoapi: minor fix in verify_credentials. 2023-08-16 19:18:46 +03:00			`return s;`
			`}`

Use xs_regex_match() where applicable. 2024-05-15 14:27:23 +03:00
			`int xs_regex_match(const char str, const char rx)`
			`/* returns if str matches the regex at least once */`
			`{`
			`xs *l = xs_regex_select_n(str, rx, 1);`

			`return xs_list_len(l) == 1;`
			`}`


Backported xs_regex.h from xs. 2022-09-27 11:03:41 +03:00			`#endif /* XS_IMPLEMENTATION */`

			`#endif /* XS_REGEX_H */`