#include <stdio.h>
#include <ctype.h>
#include "string.h"
#include "stdbool.h"
/* Pico RexExp w/ data extraction.
VERY incomplete. Just matches simple patterns:
. (a dot) represents all characters.
\a all letters.
\c all control characters.
\d all digits.
\l all lowercase letters.
\p all punctuation characters.
\s all space characters.
\u all uppercase letters.
\w all alphanumeric characters.
\x all hexadecimal digits.
\z the character with hex representation 0x00 (null).
\\ a single '%' character.
* matchrepmin=0;matchrepmax=-1;
? matchrepmin=0;matchrepmax=1;
TODO:
+ matchrepmin=1;matchrepmax=-1;
- matchrepmin=0;matchrepmax=-1;matchflag.greed=false
^ index==1?MATCH_START:MATCH_SET_INVERSE
$ index==regexp.length()?MATCH_END
( matches[matchindex++]=index;matchcount++
) matches[matchindex++]=index;matchcount--
(add parameter for an set of indexes into the matched string, with lengths)
(return the captured value in binary as well, e.g. "0F", "\x*" returns 15)
[ matchflag.class=true;
matchclass[matchclassindex++]=c;loop;
] matchflag.class=false;for(int i=matchclassindex;i>0;i--){c==matchclass[i]?break:continue;};i>0?continue:break;
\# captured pattern #.
*/
int regexp(char *str, char *pat) {
bool percent = false;
int rmax=0, rmin=1;
char c,r,last=0;
while (*str && *pat) {
c = *str;
switch (*pat) {
case '*': rmin=0; rmax=-1; pat++; break;
case '?': rmin=0; rmax=1; pat++; break;
case '+': rmin=1; rmax=-1; pat++; break;
default: if (!rmax) r = *pat;
}
if ('\\'==r) { last = r; if (*pat) r=*++pat; }
if ('\\'==last) {
if ('\\'==r && '\\'==c) goto CONTINUE;
if ('a'==r && isalpha(c) ) goto CONTINUE;
//if ('c'==r && iscntrl(c) ) goto CONTINUE;
//if ('d'==r && isdigit(c) ) goto CONTINUE;
//if ('l'==r && islower(c) ) goto CONTINUE;
//if ('p'==r && ispunct(c) ) goto CONTINUE;
//if ('s'==r && isspace(c) ) goto CONTINUE;
//if ('u'==r && isupper(c) ) goto CONTINUE;
//if ('w'==r && isalnum(c) ) goto CONTINUE;
if ('x'==r && isxdigit(c) ) goto CONTINUE;
//if ('z'==r && (0==c) ) goto CONTINUE;
//No match. Done with this mode.
last=0;
if (rmin) break; //match fail unless no min
r=*pat; rmin=1; rmax=0; continue; //might be a regular match next
}
if ('.'==r && *str) goto CONTINUE;
if (r==c) goto CONTINUE;
//didn't match
if (!rmax) {rmax=0; } //done w/ unlimited match
if (0==rmin) {rmin=1; goto CONTINUE;} //minimum number of matches happened
break;
CONTINUE:
if (*str) str++;
if (-1==rmax) {continue;} //repeating, dont advance exp
if (rmax>0) {rmax--;continue;} //repeating, dont advance exp
if (*pat) pat++;
}
//if (-1==rmax) {if (*pat) pat++;} //done w/ unlimited match, advance
//putchar(*str);putchar(*exp);
return *str || *pat;
}
int main(void) {
printf(" should pass: ");
printf("%i, ",regexp("abc","abc"));
printf("%i, ",regexp("abc","a.."));
printf("%i, ",regexp("abc","\\a\\x\\x"));
printf("%i, ",regexp("abc","\\x*")); //TODO: Why is this failing now?
printf("%i, ",regexp("abc-","\\x*-"));
printf("%i, ",regexp("abc","\\x?c"));
printf("%i, ",regexp("\\","\\\\"));
printf(" should fail: ");
printf("%i, ",regexp("abc","abcd"));
printf("%i, ",regexp("abcd","abc"));
printf("%i, ",regexp("abc","\\x?"));
printf("%i, ",regexp("123","\\a\\x\\x"));
return 0;
}