This is MiscStringRegex.m in view mode; [Download] [Up]
//
// MiscStringRegex.m
// Regular expression matching and replacement routines using regexpr.c
//
// Written by Carl Lindberg Copyright (c) 1994 by Carl Lindberg.
// Version 1.95 All rights reserved.
// This notice may not be removed from this source code.
//
// This object is included in the MiscKit by permission from the author
// and its use is governed by the MiscKit license, found in the file
// "LICENSE.rtf" in the MiscKit distribution. Please refer to that file
// for a list of all applicable permissions and restrictions.
//
/* I decided to keep with MiscStringPatterns' -grep method, except to add
* an occurrenceNum: parameter. The old calls to this method should work
* exactly as they used to. I did not include the -grepString methods, as
* I'm not sure how useful they would be, and there are a fair number of
* methods here already. If anyone thinks they were indeed useful, I can add
* them in very easily.
* On the other hand, MiscStringPatterns' -replace methods have been
* rearranged. To be more parallel with the rest of the MiscString class,
* I made the global replace its own method, and added an occurrenceNum:
* parameter to the basic replace method.
* There are other possible methods here: -indexOfRegex and -rindexOfRegex
* methods to go along with -spotOfRegex, and maybe a method to simply return
* a matched portion in a new MiscString. I'm going to let these go for the
* time being; if people think they are useful I'll add them in then then.
*/
#import <misckit/MiscString.h>
@implementation MiscString(Regex)
- (int)grep:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense before:(id)b middle:(id)m after:(id)a
{
int spot,len=0;
id tmpStr;
if (!regex) return -1;
if (n == MISC_STRING_LAST)
spot = [self rspotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:&len];
else
spot = [self spotOfRegex:regex occurrenceNum:n caseSensitive:sense length:&len];
// I decided on -setStringValue over -takeStringValueFrom....
if ((spot >= 0) && (len > 0)) {
if (b && [b respondsTo:@selector(setStringValue:)]) {
tmpStr = [self midFrom:0 to:spot-1];
if (tmpStr) [b setStringValue:[tmpStr stringValue]];
else [b setStringValue:""];
[tmpStr free];
}
if (a && [a respondsTo:@selector(setStringValue:)]) {
tmpStr = [self midFrom:spot+len to:length-1];
if (tmpStr) [a setStringValue:[tmpStr stringValue]];
else [a setStringValue:""];
[tmpStr free];
}
if (m && [m respondsTo:@selector(setStringValue:)]) {
tmpStr = [self midFrom:spot length:len];
if (tmpStr) [m setStringValue:[tmpStr stringValue]];
else [m setStringValue:""];
[tmpStr free];
}
}
else { // I'm not sure what I should do here. Leave them alone?
if (a && [a respondsTo:@selector(setStringValue:)])
[a setStringValue:""];
if (b && [b respondsTo:@selector(setStringValue:)])
[b setStringValue:""];
if (m && [m respondsTo:@selector(setStringValue:)])
[m setStringValue:""];
}
//spotOfRegex returns -2 if a problem, -1 if not found, and >=0 if found.
//Therefore, add one, then make everything above one equal one.
if (++spot > 1) spot = 1;
return spot;
}
- (int)numOfRegex:(const char *)regex caseSensitive:(BOOL)sense
{
struct re_pattern_buffer pat; // or regexp_t (which is a pointer)
int currnum=0, currspot=0,len=0, i, pos;
char fm[256], tr[256];
char *errstr;
if (!regex) return -1;
memset(&pat,0,sizeof(pat));
for (i=0;i<256;i++) tr[i] = i;
if (!sense)
for (i='A';i<='Z';i++) tr[i] = i- 'A' + 'a';
pat.translate=tr;
pat.fastmap=fm;
errstr = re_compile_pattern((char *)regex,strlen(regex),&pat);
if (errstr) {
if (pat.buffer) free(pat.buffer);
return -1;
}
while ((pos = re_search_pattern(&pat, buffer, length, currspot, length-currspot,0)) >= 0) {
len = re_match_pattern(&pat,buffer,length,pos,0);
if(len>0){
currspot = (pos+len);
currnum++;
}
else {
if (pat.buffer) free(pat.buffer);
return currnum;
}
}
if (pat.buffer) free(pat.buffer);
return currnum;
}
- (int)replaceEveryOccurrenceOfRegex:(const char *)regex with:(const char *)aString caseSensitive:(BOOL)sense
{
struct re_pattern_buffer pat;
int currnum=0, currspot=0,len=0;
char fm[256], tr[256];
char *errstr;
int i, pos;
id tmpStr;
if (!regex) return -1;
memset(&pat,0,sizeof(pat));
for (i=0;i<256;i++) tr[i] = i;
if (!sense)
for (i='A';i<='Z';i++) tr[i] = i- 'A' + 'a';
pat.translate=tr;
pat.fastmap=fm;
errstr = re_compile_pattern((char *)regex,strlen(regex),&pat);
if (errstr) {
if (pat.buffer) free(pat.buffer);
return -1;
}
tmpStr = [[[self class] alloc] allocateBuffer:length];
while ((pos = re_search_pattern(&pat, buffer, length, currspot, length-currspot,0)) >= 0) {
len = re_match_pattern(&pat,buffer,length,pos,0);
[tmpStr cat:buffer+currspot n:pos-currspot];
if (len>0) {
[tmpStr cat:aString];
currnum++;
currspot = (pos+len);
}
else {
currspot = pos;
break;
}
}
[tmpStr cat:buffer+currspot n:length - currspot];
[self takeStringValueFrom:tmpStr];
[tmpStr free];
if (pat.buffer) free(pat.buffer);
return currnum;
}
- (int)spotOfRegex:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense length:(int *)matchlen
{
struct re_pattern_buffer pat; // or regexp_t (which is a pointer)
int currnum=0, currspot=0, len=0;
char fm[256], tr[256];
char *errstr;
int i, pos=-1;
if (!regex) {
if (matchlen) *matchlen = 0;
return -2;
}
if (n<0) {
if (matchlen) *matchlen = 0;
return -1;
}
memset(&pat,0,sizeof(pat));
for (i=0;i<256;i++) tr[i] = i;
if (!sense)
for (i='A';i<='Z';i++) tr[i] = i- 'A' + 'a';
pat.translate=tr;
pat.fastmap=fm;
errstr = re_compile_pattern((char *)regex,strlen(regex),&pat);
if (errstr) {
if (matchlen) *matchlen = 0;
if (pat.buffer) free(pat.buffer);
return -2;
}
while ((currnum <= n) && ((pos = re_search_pattern(&pat, buffer, length, currspot, length-currspot,0)) >= 0)) {
len = re_match_pattern(&pat,buffer,length,pos,0);
if(len>0 && currnum <= n){
currspot = (pos+len);
currnum++;
}
else {
pos = -1;
break;
}
}
if (pos < 0) len = 0;
if (matchlen) *matchlen = len;
if (pat.buffer) free(pat.buffer);
return pos;
}
- (int)rspotOfRegex:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense length:(int *)matchlen
{
int num = [self numOfRegex:regex caseSensitive:sense];
return [self spotOfRegex:regex occurrenceNum:num-1-n caseSensitive:sense length:matchlen];
}
- replaceRegex:(const char *)regex with:(const char *)aString occurrenceNum:(int)n caseSensitive:(BOOL)sense
{
int spot, len;
spot = [self spotOfRegex:regex occurrenceNum:n caseSensitive:sense length:&len];
if ((spot >= 0) && (len > 0))
[self replaceFrom:spot length:len with:aString];
return self;
}
// I thought about having this return a negative number on an error, but I decided
// to return 0 on both error or not found, and the length of the matched
// portion otherwise. That enables this method to be used kind of like a BOOL
// in an if statement.
- (int)matchesRegex:(const char *)regex caseSensitive:(BOOL)sense
{
int spot, len=0;
if (!regex) return 0;
spot = [self spotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:&len];
if (spot == 0) return len;
return 0;
}
//Now all the convenience methods....
- (int)grep:(const char *)regex
{ return [self grep:regex occurrenceNum:0 caseSensitive:YES before:nil middle:nil after:nil];}
- (int)grep:(const char *)regex caseSensitive:(BOOL)sense
{ return [self grep:regex occurrenceNum:0 caseSensitive:sense before:nil middle:nil after:nil];}
- (int)grep:(const char *)regex occurrenceNum:(int)n
{ return [self grep:regex occurrenceNum:n caseSensitive:YES before:nil middle:nil after:nil];}
- (int)grep:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense
{ return [self grep:regex occurrenceNum:n caseSensitive:sense before:nil middle:nil after:nil];}
- (int)grep:(const char *)regex before:(id)b middle:(id)m after:(id)a
{ return [self grep:regex occurrenceNum:0 caseSensitive:YES before:b middle:m after:a];}
- (int)grep:(const char *)regex caseSensitive:(BOOL)sense before:(id)b middle:(id)m after:(id)a
{ return [self grep:regex occurrenceNum:0 caseSensitive:sense before:b middle:m after:a];}
- (int)grep:(const char *)regex occurrenceNum:(int)n before:(id)b middle:(id)m after:(id)a
{ return [self grep:regex occurrenceNum:n caseSensitive:YES before:b middle:m after:a];}
- (int)numOfRegex:(const char *)regex
{ return [self numOfRegex:regex caseSensitive:YES];}
- (int)replaceEveryOccurrenceOfRegex:(const char *)regex with:(const char *)aString
{ return [self replaceEveryOccurrenceOfRegex:regex with:aString caseSensitive:YES];}
- (int)replaceEveryOccurrenceOfRegex:(const char *)regex withChar:(char)aChar caseSensitive:(BOOL)sense
{
char str[2];
if (!aChar) return -1; //or should we let this go?
str[1] = 0;
str[0] = aChar;
return [self replaceEveryOccurrenceOfRegex:regex with:str caseSensitive:sense];
}
- (int)replaceEveryOccurrenceOfRegex:(const char *)regex withChar:(char)aChar
{ return [self replaceEveryOccurrenceOfRegex:regex withChar:aChar caseSensitive:YES];}
- (int)replaceEveryOccurrenceOfRegex:(const char *)regex withString:(id)sender
{ return [self replaceEveryOccurrenceOfRegex:regex withString:sender caseSensitive:YES];}
- (int)replaceEveryOccurrenceOfRegex:(const char *)regex withString:(id)sender caseSensitive:(BOOL)sense
{
if (![sender respondsTo:@selector(stringValue)]) return -1;
return [self replaceEveryOccurrenceOfRegex:regex
with:[sender stringValue]
caseSensitive:sense];
}
- (int)spotOfRegex:(const char *)regex
{ return [self spotOfRegex:regex occurrenceNum:0 caseSensitive:YES length:NULL];}
- (int)spotOfRegex:(const char *)regex caseSensitive:(BOOL)sense
{ return [self spotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:NULL];}
- (int)spotOfRegex:(const char *)regex occurrenceNum:(int)n
{ return [self spotOfRegex:regex occurrenceNum:n caseSensitive:YES length:NULL];}
- (int)spotOfRegex:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense
{ return [self spotOfRegex:regex occurrenceNum:n caseSensitive:sense length:NULL];}
- (int)spotOfRegex:(const char *)regex length:(int *)matchlen
{ return [self spotOfRegex:regex occurrenceNum:0 caseSensitive:YES length:matchlen];}
- (int)spotOfRegex:(const char *)regex caseSensitive:(BOOL)sense length:(int *)matchlen
{ return [self spotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:matchlen];}
- (int)spotOfRegex:(const char *)regex occurrenceNum:(int)n length:(int *)matchlen
{ return [self spotOfRegex:regex occurrenceNum:n caseSensitive:YES length:matchlen];}
- (int)rspotOfRegex:(const char *)regex
{ return [self rspotOfRegex:regex occurrenceNum:0 caseSensitive:YES length:NULL];}
- (int)rspotOfRegex:(const char *)regex caseSensitive:(BOOL)sense
{ return [self rspotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:NULL];}
- (int)rspotOfRegex:(const char *)regex occurrenceNum:(int)n
{ return [self rspotOfRegex:regex occurrenceNum:n caseSensitive:YES length:NULL];}
- (int)rspotOfRegex:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense
{ return [self rspotOfRegex:regex occurrenceNum:n caseSensitive:sense length:NULL];}
- (int)rspotOfRegex:(const char *)regex length:(int *)matchlen
{ return [self rspotOfRegex:regex occurrenceNum:0 caseSensitive:YES length:matchlen];}
- (int)rspotOfRegex:(const char *)regex caseSensitive:(BOOL)sense length:(int *)matchlen
{ return [self rspotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:matchlen];}
- (int)rspotOfRegex:(const char *)regex occurrenceNum:(int)n length:(int *)matchlen
{ return [self rspotOfRegex:regex occurrenceNum:n caseSensitive:YES length:matchlen];}
- replaceRegex:(const char *)regex with:(const char *)aString
{ return [self replaceRegex:regex with:aString occurrenceNum:0 caseSensitive:YES];}
- replaceRegex:(const char *)regex with:(const char *)aString caseSensitive:(BOOL)sense
{ return [self replaceRegex:regex with:aString occurrenceNum:0 caseSensitive:sense];}
- replaceRegex:(const char *)regex with:(const char *)aString occurrenceNum:(int)n
{ return [self replaceRegex:regex with:aString occurrenceNum:n caseSensitive:YES];}
- replaceRegex:(const char *)regex withChar:(char)aChar
{ return [self replaceRegex:regex withChar:aChar occurrenceNum:0 caseSensitive:YES];}
- replaceRegex:(const char *)regex withChar:(char)aChar caseSensitive:(BOOL)sense
{ return [self replaceRegex:regex withChar:aChar occurrenceNum:0 caseSensitive:sense];}
- replaceRegex:(const char *)regex withChar:(char)aChar occurrenceNum:(int)n
{ return [self replaceRegex:regex withChar:aChar occurrenceNum:n caseSensitive:YES]; }
- replaceRegex:(const char *)regex withChar:(char)aChar occurrenceNum:(int)n caseSensitive:(BOOL)sense
{
char str[2];
if (!aChar) return nil; //or self? or check for this at all?
str[1] = 0;
str[0] = aChar;
return [self replaceRegex:regex with:str occurrenceNum:n caseSensitive:sense];
}
- replaceRegex:(const char *)regex withString:(id)sender
{ return [self replaceRegex:regex withString:sender occurrenceNum:0 caseSensitive:YES];}
- replaceRegex:(const char *)regex withString:(id)sender caseSensitive:(BOOL)sense
{ return [self replaceRegex:regex withString:sender occurrenceNum:0 caseSensitive:sense]; }
- replaceRegex:(const char *)regex withString:(id)sender occurrenceNum:(int)n
{ return [self replaceRegex:regex withString:sender occurrenceNum:n caseSensitive:YES]; }
- replaceRegex:(const char *)regex withString:(id)sender occurrenceNum:(int)n caseSensitive:(BOOL)sense
{
if (![sender respondsTo:@selector(stringValue)]) return self; //hmmm
return [self replaceRegex:regex with:[sender stringValue]
occurrenceNum:n caseSensitive:sense];
}
- (int)matchesRegex:(const char *)regex
{ return [self matchesRegex:regex caseSensitive:YES];}
@endThese are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.