This repository has been archived on 2023-07-11. You can view files and clone it, but cannot push or open issues or pull requests.
fte/e_regex.cpp

1117 lines
34 KiB
C++
Raw Permalink Normal View History

/* e_regex.cpp
*
* Copyright (c) 1994-1996, Marko Macek
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
*
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "e_regex.h"
//#define DEBUG
static int RegCount = 0;
#ifdef DEBUG
static void RxDump(int N, RxNode *n);
#endif
static RxNode *NewNode(int aWhat) {
RxNode *N = (RxNode *) malloc(sizeof(RxNode));
if (N) {
memset(N, 0, sizeof(RxNode));
N->fWhat = (short)aWhat;
}
return N;
}
static RxNode *NewChar(char Ch) {
RxNode *A = NewNode(RE_CHAR);
if (A) {
A->fChar = (char *)malloc(1);
A->fLen = 1;
A->fChar[0] = Ch;
}
return A;
}
static RxNode *NewEscape(const char **const Regexp) {
char Ch = **Regexp;
++*Regexp;
switch (Ch) {
case 0: return 0;
case 'a': Ch = '\a'; break;
case 'b': Ch = '\b'; break;
case 'f': Ch = '\f'; break;
case 'n': Ch = '\n'; break;
case 'r': Ch = '\r'; break;
case 't': Ch = '\t'; break;
case 'v': Ch = '\v'; break;
case 'e': Ch = 27; break;
case 's': return NewNode(RE_WSPACE);
case 'S': return NewNode(RE_NWSPACE);
case 'U': return NewNode(RE_UPPER);
case 'L': return NewNode(RE_LOWER);
case 'w': return NewNode(RE_WORD);
case 'W': return NewNode(RE_NWORD);
case 'd': return NewNode(RE_DIGIT);
case 'D': return NewNode(RE_NDIGIT);
case 'C': return NewNode(RE_CASE);
case 'c': return NewNode(RE_NCASE);
case 'N':
{
unsigned int N = 0;
unsigned int A = 0;
if (**Regexp == 0) return 0;
N = toupper(**Regexp) - 48; if (N > 9) return 0;
(*Regexp)++;
A = N * 100;
if (**Regexp == 0) return 0;
N = toupper(**Regexp) - 48; if (N > 9) return 0;
(*Regexp)++;
A = A + N * 10;
if (**Regexp == 0) return 0;
N = toupper(**Regexp) - 48; if (N > 9) return 0;
(*Regexp)++;
A = A + N;
Ch = (char)A;
}
break;
case 'o':
{
unsigned int N = 0;
unsigned int A = 0;
if (**Regexp == 0) return 0;
N = toupper(**Regexp) - 48; if (N > 7) return 0;
(*Regexp)++;
A = N * 64;
if (**Regexp == 0) return 0;
N = toupper(**Regexp) - 48; if (N > 7) return 0;
(*Regexp)++;
A = A + N * 8;
if (**Regexp == 0) return 0;
N = toupper(**Regexp) - 48; if (N > 7) return 0;
(*Regexp)++;
A = A + N;
Ch = (char)A;
}
break;
case 'x':
{
unsigned int N = 0;
unsigned int A = 0;
if (**Regexp == 0) return 0;
N = toupper(**Regexp) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
(*Regexp)++;
A = N << 4;
if (**Regexp == 0) return 0;
N = toupper(**Regexp) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
(*Regexp)++;
A = A + N;
Ch = (char)A;
}
break;
}
return NewChar(Ch);
}
#define NNN 32 // 8 * 32 = 256 (match set)
#define SETOP(set,n) \
do { \
set[(unsigned char)(n) >> 3] |= (unsigned char)(1 << ((unsigned char)(n) & 7)); \
} while (0)
static RxNode *NewSet(const char ** const Regexp) {
unsigned char set[NNN];
int s = 0;
int c = 0;
unsigned int i, xx;
unsigned char Ch, C1 = 0, C2 = 0;
int doset = 0;
memset(set, 0, sizeof(set));
s = 1;
if (**Regexp == '^') {
s = 0;
++*Regexp;
}
c = 0;
while (**Regexp) {
switch (Ch = *((*Regexp)++)) {
case ']':
if (doset == 1) return 0;
{
RxNode *N = NewNode(s?RE_INSET:RE_NOTINSET);
N->fChar = (char *) malloc(sizeof(set));
N->fLen = sizeof(set);
if (N->fChar == 0) return 0;
memcpy(N->fChar, (char *) set, sizeof(set));
return N;
}
case '\\':
switch (Ch = *((*Regexp)++)) {
case 0: return 0;
case 'a': Ch = '\a'; break;
case 'b': Ch = '\b'; break;
case 'f': Ch = '\f'; break;
case 'n': Ch = '\n'; break;
case 'r': Ch = '\r'; break;
case 't': Ch = '\t'; break;
case 'v': Ch = '\v'; break;
case 'e': Ch = 27; break;
case 'N':
{
unsigned int N = 0;
unsigned int A = 0;
if (**Regexp == 0) return 0;
N = toupper(**Regexp) - 48; if (N > 9) return 0;
(*Regexp)++;
A = N * 100;
if (**Regexp == 0) return 0;
N = toupper(**Regexp) - 48; if (N > 9) return 0;
(*Regexp)++;
A = A + N * 10;
if (**Regexp == 0) return 0;
N = toupper(**Regexp) - 48; if (N > 9) return 0;
(*Regexp)++;
A = A + N;
Ch = (unsigned char)A;
}
break;
case 'o':
{
unsigned int N = 0;
unsigned int A = 0;
if (**Regexp == 0) return 0;
N = toupper(**Regexp) - 48; if (N > 7) return 0;
(*Regexp)++;
A = N * 64;
if (**Regexp == 0) return 0;
N = toupper(**Regexp) - 48; if (N > 7) return 0;
(*Regexp)++;
A = A + N * 8;
if (**Regexp == 0) return 0;
N = toupper(**Regexp) - 48; if (N > 7) return 0;
(*Regexp)++;
A = A + N;
Ch = (unsigned char)A;
}
break;
case 'x':
{
unsigned int N = 0;
unsigned int A = 0;
if (**Regexp == 0) return 0;
N = toupper(**Regexp) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
(*Regexp)++;
A = N << 4;
if (**Regexp == 0) return 0;
N = toupper(**Regexp) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
(*Regexp)++;
A = A + N;
Ch = (unsigned char)A;
}
break;
case 's':
c += 4;
SETOP(set, '\n');
SETOP(set, '\t');
SETOP(set, ' ');
SETOP(set, '\r');
continue;
case 'S':
for (xx = 0; xx <= 255; xx++) {
if (xx != ' ' && xx != '\t' && xx != '\n' && xx != '\r') {
c++;
SETOP(set, xx);
}
}
continue;
case 'w':
for (xx = 0; xx <= 255; xx++) {
if (isalnum(xx)) {
c++;
SETOP(set, xx);
}
}
break;
case 'W':
for (xx = 0; xx <= 255; xx++) {
if (!isalnum(xx)) {
c++;
SETOP(set, xx);
}
}
break;
case 'd':
for (xx = 0; xx <= 255; xx++) {
if (isdigit(xx)) {
c++;
SETOP(set, xx);
}
}
break;
case 'D':
for (xx = 0; xx <= 255; xx++) {
if (!isdigit(xx)) {
c++;
SETOP(set, xx);
}
}
break;
case 'U':
for (xx = 'A'; xx <= 'Z'; xx++) {
c++;
SETOP(set, xx);
}
continue;
case 'L':
for (xx = 'a'; xx <= 'z'; xx++) {
c++;
SETOP(set, xx);
}
continue;
}
break;
}
if (doset == 0 && ((**Regexp) == '-')) {
doset = 1;
C1 = Ch;
++*Regexp;
continue;
} else if (doset == 1) {
C2 = Ch;
if (C2 < C1) return 0;
for(i = C1; i <= C2; i++) SETOP(set, i);
doset = 0;
continue;
}
c++;
SETOP(set, Ch);
}
return 0;
}
static int AddNode(RxNode **F, RxNode **N, RxNode *A) {
if (A) {
if (*F) {
(*N)->fNext = A;
A->fPrev = (*N);
*N = A;
} else {
(*N) = (*F) = A;
A->fPrev = A->fNext = 0;
}
return 1;
}
return 0;
}
static int CountWidth(RxNode *N) {
int w = 0;
while (N) {
if (N->fWhat < 32) w += 0;
else if (N->fWhat >= 32 && N->fWhat < 64)
w += 1;
N = N->fNext;
}
return w;
}
static int MakeSub(RxNode **F, RxNode **N, char What) {
//printf("MakeSub: %c\n", What);
if (*N) {
RxNode *No;
RxNode *New;
RxNode *Jump, *Skip;
RxNode *Last = (*N);
if (Last->fWhat & RE_GROUP) {
RxNode *P = Last->fPrev;
int C = 1;
while ((C > 0) && P) {
//puts("backtracking...-----");
//RxDump(0, P);
if (P->fWhat & RE_GROUP) {
if (P->fWhat & RE_CLOSE) C++;
else C--;
}
Last = P;
if (C == 0) break;
P = P->fPrev;
}
//printf("P = %s, c = %d", P ? "ok":"null", C);
if (C != 0) return 0;
}
assert(Last);
if (What != '?' && What != '|')
if (CountWidth(Last) == 0) {
// puts("FAILED count");
return 0;
}
switch (What) {
case '?': /* BRANCH x NOTHING */
New = NewNode(RE_BRANCH | RE_GREEDY | What);
No = NewNode(RE_NOTHING);
if (!New || !No) return 0;
No->fPrev = *N;
if (*N)
(*N)->fNext = No;
New->fNext = Last;
New->fPrev = Last->fPrev;
Last->fPrev = New;
if (New->fPrev) {
New->fPrev->fNext = New;
} else {
*F = New;
}
New->fPtr = No;
No->fPtr = New;
*N = No;
//puts("BRANCH ?");
break;
case '*':
case '@':
New = NewNode(RE_BRANCH | What | ((What == '*') ? RE_GREEDY : 0));
Jump = NewNode(RE_JUMP);
No = NewNode(RE_NOTHING);
if (!New || !No || !Jump) return 0;
No->fPrev = Jump;
Jump->fNext = No;
Jump->fPrev = *N;
if (*N)
(*N)->fNext = Jump;
New->fNext = Last;
New->fPrev = Last->fPrev;
Last->fPrev = New;
if (New->fPrev) {
New->fPrev->fNext = New;
} else {
*F = New;
}
New->fPtr = No;
No->fPtr = New;
Jump->fPtr = New;
*N = No;
//puts("BRANCH *");
break;
case '#':
case '+':
New = NewNode(RE_BRANCH | What | ((What == '+') ? RE_GREEDY : 0));
Skip = NewNode(RE_JUMP);
Jump = NewNode(RE_JUMP);
No = NewNode(RE_NOTHING);
if (!New || !No || !Jump) return 0;
No->fPrev = Jump;
Jump->fPrev = *N;
Jump->fNext = No;
Skip->fNext = New;
New->fPrev = Skip;
if (*N)
(*N)->fNext = Jump;
New->fNext = Last;
Skip->fPrev = Last->fPrev;
Last->fPrev = New;
if (Skip->fPrev) {
Skip->fPrev->fNext = Skip;
} else {
*F = Skip;
}
New->fPtr = No;
No->fPtr = New;
Jump->fPtr = New;
Skip->fPtr = Last;
*N = No;
//puts("BRANCH +");
break;
case '|':
New = NewNode(RE_BRANCH | RE_GREEDY | What);
Jump = NewNode(RE_BREAK);
No = NewNode(RE_NOTHING);
if (!New || !No || !Jump) return 0;
No->fPrev = Jump;
Jump->fNext = No;
Jump->fPrev = *N;
if (*N)
(*N)->fNext = Jump;
New->fNext = Last;
New->fPrev = Last->fPrev;
Last->fPrev = New;
if (New->fPrev) {
New->fPrev->fNext = New;
} else {
*F = New;
}
New->fPtr = No;
No->fPtr = New;
Jump->fPtr = New;
*N = No;
//puts("BRANCH |");
break;
}
return 1;
}
return 0;
}
#define CHECK(n) do { if ((n) == 0) { return 0;} } while (0)
static RxNode *RxComp(const char **const Regexp) {
RxNode *F = 0;
RxNode *N = 0;
int C;
char Ch;
while (**Regexp) {
// puts(*Regexp);
switch (Ch = (*(*Regexp)++)) {
case '?':
case '*':
case '+':
case '@':
case '#':
case '|':
CHECK(MakeSub(&F, &N, Ch));
break;
case '}':
case ')':
return F;
case '{':
CHECK(AddNode(&F, &N, NewNode(RE_GROUP | RE_OPEN)));
CHECK(AddNode(&F, &N, RxComp(Regexp)));
while (N->fNext) N = N->fNext;
CHECK(AddNode(&F, &N, NewNode(RE_GROUP | RE_CLOSE)));
break;
case '(':
C = ++RegCount;
CHECK(AddNode(&F, &N, NewNode(RE_GROUP | RE_OPEN | RE_MEM | C)));
CHECK(AddNode(&F, &N, RxComp(Regexp)));
while (N->fNext) N = N->fNext;
CHECK(AddNode(&F, &N, NewNode(RE_GROUP | RE_CLOSE | RE_MEM | C)));
break;
case '\\':CHECK(AddNode(&F, &N, NewEscape(Regexp))); break;
case '[': CHECK(AddNode(&F, &N, NewSet(Regexp))); break;
case '^': CHECK(AddNode(&F, &N, NewNode(RE_ATBOL))); break;
case '$': CHECK(AddNode(&F, &N, NewNode(RE_ATEOL))); break;
case '.': CHECK(AddNode(&F, &N, NewNode(RE_ANY))); break;
case '<': CHECK(AddNode(&F, &N, NewNode(RE_ATBOW))); break;
case '>': CHECK(AddNode(&F, &N, NewNode(RE_ATEOW))); break;
default:
--*Regexp;
CHECK(AddNode(&F, &N, NewChar(**Regexp)));
++*Regexp;
break;
}
}
return F;
}
RxNode *RxOptimize(RxNode *rx) {
return rx;
}
RxNode *RxCompile(const char *Regexp) {
RxNode *n = 0, *x;
if (Regexp == 0) return 0;
RegCount = 0;
n = RxComp(&Regexp);
if (n == 0) return 0;
n = RxOptimize(n);
x = n;
while (x->fNext) x = x->fNext;
x->fNext = NewNode(RE_END);
return n;
}
void RxFree(RxNode *n) {
RxNode *p;
while (n) {
p = n;
n = n->fNext;
switch (p->fWhat) {
case RE_INSET:
case RE_NOTINSET:
case RE_CHAR:
free(p->fChar);
break;
default:
break;
}
free(p);
}
}
#define ChClass(x) (((((x) >= 'A') && ((x) <= 'Z')) || (((x) >= 'a') && ((x) <= 'z')) || (((x) >= '0') && ((x) <= '9')))?1:0)
static RxMatchRes *match;
static const char *bop;
static const char *eop;
static int flags = RX_CASE;
static const char *rex;
int RxMatch(RxNode *rx) {
RxNode *n = rx;
//printf(">>");
while (n) {
//printf("%-50.50s\n", rex);
//RxDump(1, n);
switch (n->fWhat) {
case RE_NOTHING:
break;
case RE_CASE:
flags |= RX_CASE;
break;
case RE_NCASE:
flags &= ~RX_CASE;
break;
case RE_ATBOL:
if (rex != bop) return 0;
break;
case RE_ATEOL:
if (rex != eop) return 0;
break;
case RE_ANY:
if (rex == eop) return 0;
rex++;
break;
case RE_WSPACE:
if (rex == eop) return 0;
if (*rex != ' ' && *rex != '\n' && *rex != '\r' && *rex != '\t') return 0;
rex++;
break;
case RE_NWSPACE:
if (rex == eop) return 0;
if (*rex == ' ' || *rex == '\n' || *rex == '\r' || *rex == '\t') return 0;
rex++;
break;
case RE_WORD:
if (rex == eop) return 0;
if (!isalnum(*rex)) return 0;
rex++;
break;
case RE_NWORD:
if (rex == eop) return 0;
if (isalnum(*rex)) return 0;
rex++;
break;
case RE_DIGIT:
if (rex == eop) return 0;
if (!isdigit(*rex)) return 0;
rex++;
break;
case RE_NDIGIT:
if (rex == eop) return 0;
if (isdigit(*rex)) return 0;
rex++;
break;
case RE_UPPER:
if (rex == eop) return 0;
if (!isupper(*rex)) return 0;
rex++;
break;
case RE_LOWER:
if (rex == eop) return 0;
if (!islower(*rex)) return 0;
rex++;
break;
case RE_ATBOW:
if (rex >= eop) return 0;
if (rex > bop) {
if ((ChClass(*rex) != 1) || (ChClass(*(rex-1)) != 0)) return 0;
}
break;
case RE_ATEOW:
if (rex <= bop) return 0;
if (rex < eop) {
if ((ChClass(*rex) != 0) || (ChClass(*(rex-1)) != 1)) return 0;
}
break;
case RE_CHAR:
if (rex == eop) return 0;
if (flags & RX_CASE) {
if (*n->fChar != *rex) return 0;
if (memcmp(rex, n->fChar, n->fLen) != 0) return 0;
} else {
for (int i = 0; i < n->fLen; i++)
if (toupper(rex[i]) != toupper(n->fChar[i]))
return 0;
}
rex += n->fLen;
break;
case RE_INSET:
if (rex == eop) return 0;
if ((n->fChar[(unsigned char)(*rex) >> 3] & (1 << ((unsigned char)(*rex) & 7))) == 0) return 0;
rex++;
break;
case RE_NOTINSET:
if (rex == eop) return 0;
if (n->fChar[(unsigned char)(*rex) >> 3] & (1 << ((unsigned char)(*rex) & 7))) return 0;
rex++;
break;
case RE_JUMP:
n = n->fPtr;
continue;
case RE_END:
return 1;
case RE_BREAK:
n = n->fNext;
if (n->fNext == 0) break;
n = n->fNext;
if (n->fWhat & RE_BRANCH) {
while ((n->fWhat & RE_BRANCH) && n->fPtr && ((n->fWhat & 0xFF) == '|'))
n = n->fPtr->fNext;
}
if (n->fWhat & RE_GROUP) {
int C = 1;
n = n->fNext;
while ((C > 0) && n) {
if (n->fWhat & RE_GROUP) {
if (n->fWhat & RE_OPEN) C++;
else C--;
}
if (C == 0) break;
n = n->fNext;
}
}
break;
default:
if (n->fWhat & RE_GROUP) {
if (n->fWhat & RE_MEM) {
const char *save = rex;
int b = n->fWhat & 0xFF;
int fl = flags;
if (RxMatch(n->fNext) == 0) {
flags = fl;
if (n->fWhat & RE_OPEN)
match->Open[b] = -1;
else
match->Close[b] = -1;
return 0;
}
if (n->fWhat & RE_OPEN) {
// if (match->Open[b] == -1)
match->Open[b] = (int) (save - bop);
} else {
// if (match->Close[b] == -1)
match->Close[b] = (int) (save - bop);
}
return 1;
}
} else if (n->fWhat & RE_BRANCH) {
const char *save = rex;
int fl = flags;
if ((n->fWhat & RE_GREEDY) == 0) {
if (RxMatch(n->fPtr) == 1) return 1;
flags = fl;
rex = save;
} else {
if (RxMatch(n->fNext) == 1) return 1;
flags = fl;
rex = save;
n = n->fPtr;
continue;
}
}
break;
}
n = n->fNext;
}
/* NOTREACHED */
assert(1 == 0 /* internal regexp error */);
return 0;
}
int RxTry(RxNode *rx, const char *s) {
int fl = flags;
rex = s;
for (int i = 0; i < NSEXPS; i++)
match->Open[i] = match->Close[i] = -1;
if (RxMatch(rx)) {
match->Open[0] = (int) (s - bop);
match->Close[0] = (int) (rex - bop);
return 1;
}
flags = fl;
return 0;
}
int RxExec(RxNode *Regexp, const char *Data, int Len, const char *Start, RxMatchRes *Match, unsigned int RxOpt) {
char Ch;
if (Regexp == 0) return 0;
match = Match;
bop = Data;
eop = Data + Len;
flags = RxOpt;
for (int i = 0; i < NSEXPS; i++) Match->Open[i] = Match->Close[i] = -1;
switch (Regexp->fWhat) { // this should be more clever
case RE_ATBOL: // match is anchored
return RxTry(Regexp, Start);
case RE_CHAR: // search for a character to match
Ch = Regexp->fChar[0];
if (Start == eop)
break;
if (flags & RX_CASE) {
while (1) {
while (Start < eop && *Start != Ch)
Start++;
if (Start == eop)
break;
if (RxTry(Regexp, Start))
return 1;
if (++Start == eop)
break;
}
} else {
Ch = toupper(Ch);
while (1) {
while (Start < eop && toupper(*Start) != Ch)
Start++;
if (Start == eop)
break;
if (RxTry(Regexp, Start))
return 1;
if (++Start == eop)
break;
}
}
break;
default: // (slow)
do {
if (RxTry(Regexp, Start)) return 1;
} while (Start++ < eop);
break;
}
return 0;
}
#define FLAG_UP_CASE 1
#define FLAG_DOWN_CASE 2
#define FLAG_UP_NEXT 4
#define FLAG_DOWN_NEXT 8
static int add(int *len, char **s, const char *a, int alen, int &flag) {
int NewLen = *len + alen;
int i;
NewLen = NewLen * 2;
if (alen == 0)
return 0;
if (*s) {
*s = (char *) realloc(*s, NewLen);
assert(*s);
memcpy(*s + *len, a, alen);
} else {
*s = (char *) malloc(NewLen);
assert(*s);
memcpy(*s, a, alen);
*len = 0;
}
if (flag & FLAG_UP_CASE) {
char *p = *s + *len;
for (i = 0; i < alen; i++) {
*p = (char)toupper(*p);
p++;
}
} else if (flag & FLAG_DOWN_CASE) {
char *p = *s + *len;
for (i = 0; i < alen; i++) {
*p = (char)tolower(*p);
p++;
}
}
if (flag & FLAG_UP_NEXT) {
char *p = *s + *len;
*p = (char)toupper(*p);
flag &= ~FLAG_UP_NEXT;
} else if (flag & FLAG_DOWN_NEXT) {
char *p = *s + *len;
*p = (char)tolower(*p);
flag &= ~FLAG_DOWN_NEXT;
}
*len += alen;
return 0;
}
int RxReplace(const char *rep, const char *Src, int /*len*/, RxMatchRes match, char **Dest, int *Dlen) {
int dlen = 0;
char *dest = 0;
char Ch;
int n;
int flag = 0;
*Dest = 0;
*Dlen = 0;
// add(&dlen, &dest, Src, match.Open[0]);
while (*rep) {
switch (Ch = *rep++) {
// case '&':
// add(&dlen, &dest, Src + match.Open[0], match.Close[0] - match.Open[0], flag);
// break;
case '\\':
switch (Ch = *rep++) {
case '0':
case '1': case '2': case '3':
case '4': case '5': case '6':
case '7': case '8': case '9':
n = Ch - 48;
if (match.Open[n] != -1 && match.Close[n] != -1) {
add(&dlen, &dest, Src + match.Open[n], match.Close[n] - match.Open[n], flag);
} else return -1;
break;
case 0:
if (dest) free(dest);
return -1; // error
case 'r': Ch = '\r'; add(&dlen, &dest, &Ch, 1, flag); break;
case 'n': Ch = '\n'; add(&dlen, &dest, &Ch, 1, flag); break;
case 'b': Ch = '\b'; add(&dlen, &dest, &Ch, 1, flag); break;
case 'a': Ch = '\a'; add(&dlen, &dest, &Ch, 1, flag); break;
case 't': Ch = '\t'; add(&dlen, &dest, &Ch, 1, flag); break;
case 'U': flag |= FLAG_UP_CASE; break;
case 'u': flag |= FLAG_UP_NEXT; break;
case 'L': flag |= FLAG_DOWN_CASE; break;
case 'l': flag |= FLAG_DOWN_NEXT; break;
case 'E':
case 'e': flag &= ~(FLAG_UP_CASE | FLAG_DOWN_CASE); break;
case 'x':
{
int N = 0;
int A = 0;
if (*rep == 0) { free(dest); return 0; }
N = toupper(*rep) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
rep++;
A = N << 4;
if (*rep == 0) { free(dest); return 0; }
N = toupper(*rep) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
rep++;
A = A + N;
Ch = (char)A;
}
add(&dlen, &dest, &Ch, 1, flag);
break;
case 'd':
{
int N = 0;
int A = 0;
if (*rep == 0) { free(dest); return 0; }
N = toupper(*rep) - 48; if (N > 9) { free(dest); return 0; }
rep++;
A = N * 100;
if (*rep == 0) { free(dest); return 0; }
N = toupper(*rep) - 48; if (N > 9) { free(dest); return 0; }
rep++;
A = N * 10;
if (*rep == 0) { free(dest); return 0; }
N = toupper(*rep) - 48; if (N > 9) { free(dest); return 0; }
rep++;
A = A + N;
Ch = (char)A;
}
add(&dlen, &dest, &Ch, 1, flag);
break;
case 'o':
{
int N = 0;
int A = 0;
if (*rep == 0) { free(dest); return 0; }
N = toupper(*rep) - 48; if (N > 7) { free(dest); return 0; }
rep++;
A = N * 64;
if (*rep == 0) { free(dest); return 0; }
N = toupper(*rep) - 48; if (N > 7) { free(dest); return 0; }
rep++;
A = N * 8;
if (*rep == 0) { free(dest); return 0; }
N = toupper(*rep) - 48; if (N > 7) { free(dest); return 0; }
rep++;
A = A + N;
Ch = (char)A;
}
add(&dlen, &dest, &Ch, 1, flag);
break;
default:
add(&dlen, &dest, &Ch, 1, flag);
break;
}
break;
default:
add(&dlen, &dest, &Ch, 1, flag);
break;
}
}
// add(&dlen, &dest, Src + match.Close[0], len - match.Close[0]);
*Dlen = dlen;
*Dest = dest;
return 0;
}
#ifdef DEBUG
static void RxDump(int N, RxNode *n) {
while (n) {
for (int i = 0; i < N; i++) printf(" ");
switch (n->fWhat) {
case RE_NOTHING: printf("NOTHING\n"); break;
case RE_CHAR: printf("CHAR '%.1s'\n", n->fChar); break;
case RE_ATBOL: printf("^\n"); break;
case RE_ATEOL: printf("$\n"); break;
case RE_ANY: printf(".\n"); break;
case RE_INSET: printf("[\n"/*, n->fChar*/); break;
case RE_NOTINSET: printf("[^\n"/*, n->fChar*/); break;
case RE_ATBOW: printf("<\n"); break;
case RE_ATEOW: printf(">\n"); break;
case RE_WSPACE: printf("WSPACE\n"); break;
case RE_NWSPACE: printf("NWSPACE\n"); break;
case RE_UPPER: printf("UPPER\n"); break;
case RE_LOWER: printf("LOWER\n"); break;
case RE_JUMP: printf("JUMP\n"); break;
case RE_BREAK: printf("BREAK\n"); break;
case RE_END: printf("END\n"); break;
default:
if (n->fWhat & RE_GROUP) {
if (n->fWhat & RE_MEM) {
if (n->fWhat & RE_OPEN) printf("( %d\n", n->fWhat & 0xFF);
if (n->fWhat & RE_CLOSE) printf(") %d\n", n->fWhat & 0xFF);
} else {
if (n->fWhat & RE_OPEN) printf("{\n");
if (n->fWhat & RE_CLOSE) printf("}\n");
}
} else if (n->fWhat & RE_BRANCH) {
if (n->fWhat & RE_GREEDY) {
printf("%c\n", n->fWhat & 0xFF);
} else {
printf("%c\n", n->fWhat & 0xFF);
}
} else {
printf("???????????????\n");
}
break;
}
n = n->fNext;
}
}
#define TEST(rc,rx,st) \
strcpy(line,st); \
assert((a = RxCompile(rx)) != 0); \
puts("\n--- " rx " -- " st " -- "); \
RxDump(0,a);\
assert(rc == RxExec(a, line, strlen(line), line, &b)); \
RxFree(a);
int main() {
RxNode *a;
RxMatchRes b;
char line[1024];
TEST(1, "a", "a");
TEST(0, "b", "a");
TEST(1, "aaaa", "aaaa");
TEST(0, "bbbb", "aaaa");
TEST(1, ".", "a");
TEST(0, ".", "");
TEST(1, "a..", "axx");
TEST(0, "a..", "b..");
TEST(1, "a?b", "ab");
TEST(1, "a?b", "xb");
TEST(0, "a?C", "xb");
TEST(1, "{aa}?b", "aab");
TEST(1, "{aa}?b", "xab");
TEST(0, "{aa}?C", "xxb");
TEST(1, "^aa", "aa");
TEST(0, "^aa", "baa");
TEST(1, "^aa$" ,"aa");
TEST(0, "^aa$", "baab");
TEST(1, "a*b", "aaab");
TEST(0, "a*b", "aaaa");
TEST(1, "{aa}*b", "aaab");
TEST(0, "{aa}*b", "aaaa");
TEST(1, "b+", "bb");
TEST(1, "b+", "b");
TEST(0, "b+", "a");
TEST(1, "^b+$", "b");
TEST(0, "^b+$", "aba");
TEST(1, "a|b", " a");
TEST(1, "a|b", " b");
TEST(0, "a|b", " c");
TEST(1, "a|b|c|d|e", " a ");
TEST(1, "a|b|c|d|e", " c ");
TEST(1, "a|b|c|d|e", " e ");
TEST(0, "a|b|c|d|e", " x ");
TEST(1, "{a}|{b}|{c}|{d}|{e}", " a ");
TEST(1, "{a}|{b}|{c}|{d}|{e}", " c ");
TEST(1, "{a}|{b}|{c}|{d}|{e}", " e ");
TEST(0, "{a}|{b}|{c}|{d}|{e}", " x ");
TEST(1, "^xx{alpha}|{beta}xx$", "xxalphaxx");
TEST(1, "^xx{alpha}|{beta}xx$", "xxbetaxx");
TEST(1, "[a-z]", "aaa");
TEST(1, "^{Error}|{Warning}", "Warning search.cpp 35: Conversion may lose significant digits in function AskReplace()");
TEST(1, "^{Error}|{Warning} (.+)", "Warning search.cpp 35: Conversion may lose significant digits in function AskReplace()");
TEST(1, "^{Error}|{Warning} ([a-z.]#) ([0-9]#)", "Warning search.cpp 35: Conversion may lose significant digits in function AskReplace()");
TEST(1, "^{Error}|{Warning} (.+) ([0-9]+): (.*)$", "Warning search.cpp 35: Conversion may lose significant digits in function AskReplace()");
TEST(1, "^{Error}|{Warning} (.+) ([0-9]+): (.*)$", "Error search.cpp 35: Conversion may lose significant digits in function AskReplace()");
TEST(1, "^([a-z]+ +)*\\(", "blabla bla bla bla (");
TEST(1, "^([a-z]+\\s+)+\\(", "blabla bla bla bla (");
TEST(1, "^([a-z]+\\s*)+\\(", "blabla bla bla bla(");
TEST(1, "^([a-z]+\\s+)+\\(", "blabla bla bla bla (");
TEST(1, "^([a-z]+\\s*)+\\(", "blabla bla bla bla(");
TEST(1, "^([a-z]# #)*\\(", "blabla bla bla bla (");
TEST(1, "^([a-z]+ @)@\\(", "blabla bla bla bla (");
TEST(1, "^[\\x20-\\xFF]+$", "blabla");
TEST(1, "{a{a{a{a|a}|{a|a}a}a}a|a}", "aaaaaaaaaaaaaaaaa");
while (1) {
printf("Regexp: "); fflush(stdout); gets(line);
if (!*line) break;
a = RxCompile(line); RxDump(0, a);
printf("String: "); fflush(stdout); gets(line);
printf("rc = %d\n", RxExec(a, line, strlen(line), line, &b));
for (int i = 0; i < NSEXPS; i++) {
if (b.Open[i] != -1) {
printf("%d: %d %d\n", i, b.Open[i], b.Close[i]);
}
}
RxFree(a);
}
return 0;
}
#endif