2024-01-04 11:22:03 +03:00
|
|
|
/* copyright (c) 2022 - 2024 grunfink et al. / MIT license */
|
2022-09-27 11:03:41 +03:00
|
|
|
|
|
|
|
#ifndef _XS_REGEX_H
|
|
|
|
|
|
|
|
#define _XS_REGEX_H
|
|
|
|
|
2024-05-15 14:27:23 +03:00
|
|
|
int xs_regex_match(const char *str, const char *rx);
|
2023-01-28 19:49:02 +03:00
|
|
|
xs_list *xs_regex_split_n(const char *str, const char *rx, int count);
|
2022-11-24 10:47:02 +03:00
|
|
|
#define xs_regex_split(str, rx) xs_regex_split_n(str, rx, XS_ALL)
|
2023-09-17 03:52:44 +03:00
|
|
|
xs_list *xs_regex_select_n(const char *str, const char *rx, int count);
|
|
|
|
#define xs_regex_select(str, rx) xs_regex_select_n(str, rx, XS_ALL)
|
2023-08-17 18:38:39 +03:00
|
|
|
xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count);
|
|
|
|
#define xs_regex_replace_i(str, rx, rep) xs_regex_replace_in(str, rx, rep, XS_ALL)
|
|
|
|
#define xs_regex_replace_n(str, rx, rep, count) xs_regex_replace_in(xs_dup(str), rx, rep, count)
|
|
|
|
#define xs_regex_replace(str, rx, rep) xs_regex_replace_in(xs_dup(str), rx, rep, XS_ALL)
|
2022-09-27 11:03:41 +03:00
|
|
|
|
|
|
|
#ifdef XS_IMPLEMENTATION
|
|
|
|
|
2024-05-25 09:24:08 +03:00
|
|
|
#ifdef __TINYC__
|
|
|
|
/* fix a compilation error in tcc */
|
|
|
|
#define _REGEX_NELTS(n)
|
|
|
|
#endif
|
|
|
|
|
2022-09-27 11:03:41 +03:00
|
|
|
#include <regex.h>
|
|
|
|
|
2023-01-28 19:49:02 +03:00
|
|
|
xs_list *xs_regex_split_n(const char *str, const char *rx, int count)
|
2024-05-15 14:27:23 +03:00
|
|
|
/* splits str using regex as a separator, at most count times.
|
|
|
|
Always returns a list:
|
|
|
|
len == 0: regcomp error
|
|
|
|
len == 1: full string (no matches)
|
|
|
|
len == odd: first part [ separator / next part ]...
|
|
|
|
*/
|
2022-09-27 11:03:41 +03:00
|
|
|
{
|
|
|
|
regex_t re;
|
|
|
|
regmatch_t rm;
|
|
|
|
int offset = 0;
|
2024-05-15 14:27:23 +03:00
|
|
|
xs_list *list = xs_list_new();
|
2022-09-27 18:19:59 +03:00
|
|
|
const char *p;
|
2022-09-27 11:03:41 +03:00
|
|
|
|
|
|
|
if (regcomp(&re, rx, REG_EXTENDED))
|
2024-05-15 14:27:23 +03:00
|
|
|
return list;
|
2022-09-27 11:03:41 +03:00
|
|
|
|
|
|
|
while (count > 0 && !regexec(&re, (p = str + offset), 1, &rm, offset > 0 ? REG_NOTBOL : 0)) {
|
2022-09-27 18:19:59 +03:00
|
|
|
/* add first the leading part of the string */
|
2023-09-13 19:19:19 +03:00
|
|
|
xs *s1 = xs_str_new_sz(p, rm.rm_so);
|
|
|
|
list = xs_list_append(list, s1);
|
2022-09-27 11:03:41 +03:00
|
|
|
|
2022-09-27 18:19:59 +03:00
|
|
|
/* add now the matched text as the separator */
|
2023-09-13 19:19:19 +03:00
|
|
|
xs *s2 = xs_str_new_sz(p + rm.rm_so, rm.rm_eo - rm.rm_so);
|
|
|
|
list = xs_list_append(list, s2);
|
2022-09-27 11:03:41 +03:00
|
|
|
|
2022-09-27 18:19:59 +03:00
|
|
|
/* move forward */
|
2022-09-27 11:03:41 +03:00
|
|
|
offset += rm.rm_eo;
|
|
|
|
|
|
|
|
count--;
|
|
|
|
}
|
|
|
|
|
2022-09-27 18:19:59 +03:00
|
|
|
/* add the rest of the string */
|
|
|
|
list = xs_list_append(list, p);
|
|
|
|
|
2022-09-27 11:03:41 +03:00
|
|
|
regfree(&re);
|
|
|
|
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
2022-09-27 18:19:59 +03:00
|
|
|
|
2023-09-17 03:52:44 +03:00
|
|
|
xs_list *xs_regex_select_n(const char *str, const char *rx, int count)
|
|
|
|
/* selects all matches and return them as a list */
|
2022-09-27 18:19:59 +03:00
|
|
|
{
|
2023-01-28 19:49:02 +03:00
|
|
|
xs_list *list = xs_list_new();
|
2022-09-27 18:19:59 +03:00
|
|
|
xs *split = NULL;
|
2024-05-23 11:01:37 +03:00
|
|
|
const xs_val *v;
|
2022-09-27 18:19:59 +03:00
|
|
|
int n = 0;
|
|
|
|
|
|
|
|
/* split */
|
2022-10-07 14:48:53 +03:00
|
|
|
split = xs_regex_split_n(str, rx, count);
|
2022-09-27 18:19:59 +03:00
|
|
|
|
|
|
|
/* now iterate to get only the 'separators' (odd ones) */
|
2024-08-30 20:10:26 +03:00
|
|
|
xs_list_foreach(split, v) {
|
2022-09-27 18:19:59 +03:00
|
|
|
if (n & 0x1)
|
|
|
|
list = xs_list_append(list, v);
|
|
|
|
|
|
|
|
n++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
2023-08-16 19:18:46 +03:00
|
|
|
|
2023-08-17 18:38:39 +03:00
|
|
|
xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count)
|
2023-08-16 19:18:46 +03:00
|
|
|
/* replaces all matches with the rep string. If it contains unescaped &,
|
|
|
|
they are replaced with the match */
|
|
|
|
{
|
|
|
|
xs_str *s = xs_str_new(NULL);
|
|
|
|
xs *split = xs_regex_split_n(str, rx, count);
|
2024-05-23 11:01:37 +03:00
|
|
|
const xs_val *v;
|
2023-08-16 19:18:46 +03:00
|
|
|
int n = 0;
|
|
|
|
int pholder = !!strchr(rep, '&');
|
|
|
|
|
2024-08-30 20:10:26 +03:00
|
|
|
xs_list_foreach(split, v) {
|
2023-08-16 19:18:46 +03:00
|
|
|
if (n & 0x1) {
|
|
|
|
if (pholder) {
|
|
|
|
/* rep has a placeholder; process char by char */
|
|
|
|
const char *p = rep;
|
|
|
|
|
|
|
|
while (*p) {
|
|
|
|
if (*p == '&')
|
|
|
|
s = xs_str_cat(s, v);
|
|
|
|
else {
|
|
|
|
if (*p == '\\')
|
|
|
|
p++;
|
|
|
|
|
|
|
|
if (!*p)
|
|
|
|
break;
|
|
|
|
|
|
|
|
s = xs_append_m(s, p, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
s = xs_str_cat(s, rep);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
s = xs_str_cat(s, v);
|
|
|
|
|
|
|
|
n++;
|
|
|
|
}
|
|
|
|
|
2023-08-17 18:38:39 +03:00
|
|
|
xs_free(str);
|
|
|
|
|
2023-08-16 19:18:46 +03:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2024-05-15 14:27:23 +03:00
|
|
|
|
|
|
|
int xs_regex_match(const char *str, const char *rx)
|
|
|
|
/* returns if str matches the regex at least once */
|
|
|
|
{
|
|
|
|
xs *l = xs_regex_select_n(str, rx, 1);
|
|
|
|
|
|
|
|
return xs_list_len(l) == 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-09-27 11:03:41 +03:00
|
|
|
#endif /* XS_IMPLEMENTATION */
|
|
|
|
|
|
|
|
#endif /* XS_REGEX_H */
|