This is regex.c in view mode; [Download] [Up]
/* Regular expression subroutines Copyright (C) 1992 Joseph H. Allen This file is part of JOE (Joe's Own Editor) JOE is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. JOE is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with JOE; see the file COPYING. If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "config.h" #include "zstr.h" #include "vs.h" #include "b.h" #include "regex.h" int escape(a,b) unsigned char **a; int *b; { int c; unsigned char *s= *a; int l= *b; if(*s=='\\' && l>=2) { ++s; --l; switch(*s) { case 'n': c= 10; break; case 't': c= 9; break; case 'a': c= 7; break; case 'b': c= 8; break; case 'f': c= 12; break; case 'e': c= 27; break; case 'r': c= 13; break; case '8': c= 8; break; case '9': c= 9; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': c= *s-'0'; if(l>1 && s[1]>='0' && s[1]<='7') c=c*8+s[1]-'0', ++s, --l; if(l>1 && s[1]>='0' && s[1]<='7') c=c*8+s[1]-'0', ++s, --l; break; case 'x': case 'X': c=0; if(l>1 && s[1]>='0' && s[1]<='9') c=c*16+s[1]-'0', ++s, --l; else if(l>1 && s[1]>='A' && s[1]<='F') c=c*16+s[1]-'A'+10, ++s, --l; else if(l>1 && s[1]>='a' && s[1]<='f') c=c*16+s[1]-'a'+10, ++s, --l; if(l>1 && s[1]>='0' && s[1]<='9') c=c*16+s[1]-'0', ++s, --l; else if(l>1 && s[1]>='A' && s[1]<='F') c=c*16+s[1]-'A'+10, ++s, --l; else if(l>1 && s[1]>='a' && s[1]<='f') c=c*16+s[1]-'a'+10, ++s, --l; break; default: c= *s; break; } ++s; --l; } else (c= *s++), --l; *a= s; *b= l; return c; } static int brack(a,la,c) unsigned char **a; int *la; unsigned char c; { int inverse=0; int flag=0; unsigned char *s= *a; int l= *la; if(!l) return 0; if(*s=='^' || *s=='*') inverse=1, ++s, --l; if(l && *s==']') { ++s; --l; if(c==']') flag=1; } while(l) if(*s==']') { ++s; --l; break; } else { int cl, cr; cl=escape(&s,&l); if(l>=2 && s[0]=='-' && s[1]!=']') { --l; ++s; cr=escape(&s,&l); if(c>=cl && c<=cr) flag=1; } else if(c==cl) flag=1; } *a=s; *la=l; if(inverse) return !flag; else return flag; } static void savec(pieces,n,c) char *pieces[]; char c; { char *s=0; if(pieces[n]) vsrm(pieces[n]); s=vsncpy(s,0,&c,1); pieces[n]=s; } static void saves(pieces,n,p,szz) char *pieces[]; P *p; long szz; { if(szz>=MAXINT-31) pieces[n]=vstrunc(pieces[n],0); else { pieces[n]=vstrunc(pieces[n],(int)szz); brmem(p,pieces[n],(int)szz); } } static int skip_special(p) P *p; { int to, s; switch(s=pgetc(p)) { case '"': do if((s=pgetc(p))=='\\') pgetc(p), s=pgetc(p); while(s!=MAXINT && s!='\"'); if(s=='\"') return MAXINT-1; break; case '\'': if((s=pgetc(p))=='\\') s=pgetc(p), s=pgetc(p); if(s=='\'') return MAXINT-1; if((s=pgetc(p))=='\'') return MAXINT-1; if((s=pgetc(p))=='\'') return MAXINT-1; break; case '[': to=']'; goto skip; case '(': to=')'; goto skip; case '{': to='}'; skip: do s=skip_special(p); while(s!=to && s!=MAXINT); if(s==to) return MAXINT-1; break; case '/': s=pgetc(p); if(s=='*') do { s=pgetc(p); while(s=='*') if((s=pgetc(p))=='/') return MAXINT-1; } while(s!=MAXINT); else if(s!=MAXINT) s=prgetc(p); else s='/'; break; } return s; } int pmatch(pieces,regex,len,p,n,icase) char *pieces[]; char *regex; P *p; { int c,d; P *q=pdup(p); P *o=0; while(len--) switch(c= *regex++) { case '\\': if(!len--) goto fail; switch(c= *regex++) { case '?': d=pgetc(p); if(d== MAXINT) goto fail; savec(pieces,n++,(char)d); break; case 'n': case 'r': case 'a': case 'f': case 'b': case 't': case 'e': case 'x': case 'X': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': regex-=2; len+=2; if(pgetc(p)!= escape(®ex,&len)) goto fail; break; case '*': /* Find shortest matching sequence */ o=pdup(p); do { long pb=p->byte; if(pmatch(pieces,regex,len,p,n+1,icase)) { saves(pieces,n,o,pb-o->byte); goto succeed; } c=pgetc(p); } while(c!=MAXINT && c!='\n'); goto fail; case 'c': o=pdup(p); do { long pb=p->byte; if(pmatch(pieces,regex,len,p,n+1,icase)) { saves(pieces,n,o,pb-o->byte); goto succeed; } } while(skip_special(p)!= MAXINT); goto fail; case '[': d=pgetc(p); if(d== MAXINT) goto fail; if(!brack(®ex,&len,d)) goto fail; savec(pieces,n++,(char)d); break; case '+': { char *oregex=regex; /* Point to character to skip */ int olen=len; char *tregex; int tlen; P *r=0; o=pdup(p); /* Advance over character to skip */ if(len>=2 && regex[0]=='\\' && regex[1]=='[') { regex+=2; len-=2; brack(®ex,&len,0); } else if(len>=1) --len, ++regex; else goto done; /* Now oregex/olen point to character to skip over and regex/len point to sequence which follows */ do { P *z=pdup(p); if(pmatch(pieces,regex,len,p,n+1,icase)) { saves(pieces,n,o,z->byte-o->byte); if(r) prm(r); r=pdup(p); } pset(p,z); prm(z); c=pgetc(p); } while(c!=MAXINT && (*oregex=='\\' ? (tregex=oregex+2, tlen=olen-2, brack(&tregex,&tlen,c)) : (icase?toup(c)==toup(*oregex):c==*oregex) ) ); done: if(r) pset(p,r), prm(r); if(r) goto succeed; else goto fail; } case '^': if(!pisbol(p)) goto fail; break; case '$': if(!piseol(p)) goto fail; break; case '<': if(!pisbow(p)) goto fail; break; case '>': if(!piseow(p)) goto fail; break; default: d=pgetc(p); if(icase) { if(toup(d)!=toup(c)) goto fail; } else { if(d!=c) goto fail; } } break; default: d=pgetc(p); if(icase) { if(toup(d)!=toup(c)) goto fail; } else { if(d!=c) goto fail; } } succeed: if(o) prm(o); prm(q); return 1; fail: if(o) prm(o); pset(p,q); prm(q); return 0; }
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.