This is MiscStringRegex.m in view mode; [Download] [Up]
// // MiscStringRegex.m // Regular expression matching and replacement routines using regexpr.c // // Written by Carl Lindberg Copyright (c) 1994 by Carl Lindberg. // Version 1.95 All rights reserved. // This notice may not be removed from this source code. // // This object is included in the MiscKit by permission from the author // and its use is governed by the MiscKit license, found in the file // "LICENSE.rtf" in the MiscKit distribution. Please refer to that file // for a list of all applicable permissions and restrictions. // /* I decided to keep with MiscStringPatterns' -grep method, except to add * an occurrenceNum: parameter. The old calls to this method should work * exactly as they used to. I did not include the -grepString methods, as * I'm not sure how useful they would be, and there are a fair number of * methods here already. If anyone thinks they were indeed useful, I can add * them in very easily. * On the other hand, MiscStringPatterns' -replace methods have been * rearranged. To be more parallel with the rest of the MiscString class, * I made the global replace its own method, and added an occurrenceNum: * parameter to the basic replace method. * There are other possible methods here: -indexOfRegex and -rindexOfRegex * methods to go along with -spotOfRegex, and maybe a method to simply return * a matched portion in a new MiscString. I'm going to let these go for the * time being; if people think they are useful I'll add them in then then. */ #import <misckit/MiscString.h> @implementation MiscString(Regex) - (int)grep:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense before:(id)b middle:(id)m after:(id)a { int spot,len=0; id tmpStr; if (!regex) return -1; if (n == MISC_STRING_LAST) spot = [self rspotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:&len]; else spot = [self spotOfRegex:regex occurrenceNum:n caseSensitive:sense length:&len]; // I decided on -setStringValue over -takeStringValueFrom.... if ((spot >= 0) && (len > 0)) { if (b && [b respondsTo:@selector(setStringValue:)]) { tmpStr = [self midFrom:0 to:spot-1]; if (tmpStr) [b setStringValue:[tmpStr stringValue]]; else [b setStringValue:""]; [tmpStr free]; } if (a && [a respondsTo:@selector(setStringValue:)]) { tmpStr = [self midFrom:spot+len to:length-1]; if (tmpStr) [a setStringValue:[tmpStr stringValue]]; else [a setStringValue:""]; [tmpStr free]; } if (m && [m respondsTo:@selector(setStringValue:)]) { tmpStr = [self midFrom:spot length:len]; if (tmpStr) [m setStringValue:[tmpStr stringValue]]; else [m setStringValue:""]; [tmpStr free]; } } else { // I'm not sure what I should do here. Leave them alone? if (a && [a respondsTo:@selector(setStringValue:)]) [a setStringValue:""]; if (b && [b respondsTo:@selector(setStringValue:)]) [b setStringValue:""]; if (m && [m respondsTo:@selector(setStringValue:)]) [m setStringValue:""]; } //spotOfRegex returns -2 if a problem, -1 if not found, and >=0 if found. //Therefore, add one, then make everything above one equal one. if (++spot > 1) spot = 1; return spot; } - (int)numOfRegex:(const char *)regex caseSensitive:(BOOL)sense { struct re_pattern_buffer pat; // or regexp_t (which is a pointer) int currnum=0, currspot=0,len=0, i, pos; char fm[256], tr[256]; char *errstr; if (!regex) return -1; memset(&pat,0,sizeof(pat)); for (i=0;i<256;i++) tr[i] = i; if (!sense) for (i='A';i<='Z';i++) tr[i] = i- 'A' + 'a'; pat.translate=tr; pat.fastmap=fm; errstr = re_compile_pattern((char *)regex,strlen(regex),&pat); if (errstr) { if (pat.buffer) free(pat.buffer); return -1; } while ((pos = re_search_pattern(&pat, buffer, length, currspot, length-currspot,0)) >= 0) { len = re_match_pattern(&pat,buffer,length,pos,0); if(len>0){ currspot = (pos+len); currnum++; } else { if (pat.buffer) free(pat.buffer); return currnum; } } if (pat.buffer) free(pat.buffer); return currnum; } - (int)replaceEveryOccurrenceOfRegex:(const char *)regex with:(const char *)aString caseSensitive:(BOOL)sense { struct re_pattern_buffer pat; int currnum=0, currspot=0,len=0; char fm[256], tr[256]; char *errstr; int i, pos; id tmpStr; if (!regex) return -1; memset(&pat,0,sizeof(pat)); for (i=0;i<256;i++) tr[i] = i; if (!sense) for (i='A';i<='Z';i++) tr[i] = i- 'A' + 'a'; pat.translate=tr; pat.fastmap=fm; errstr = re_compile_pattern((char *)regex,strlen(regex),&pat); if (errstr) { if (pat.buffer) free(pat.buffer); return -1; } tmpStr = [[[self class] alloc] allocateBuffer:length]; while ((pos = re_search_pattern(&pat, buffer, length, currspot, length-currspot,0)) >= 0) { len = re_match_pattern(&pat,buffer,length,pos,0); [tmpStr cat:buffer+currspot n:pos-currspot]; if (len>0) { [tmpStr cat:aString]; currnum++; currspot = (pos+len); } else { currspot = pos; break; } } [tmpStr cat:buffer+currspot n:length - currspot]; [self takeStringValueFrom:tmpStr]; [tmpStr free]; if (pat.buffer) free(pat.buffer); return currnum; } - (int)spotOfRegex:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense length:(int *)matchlen { struct re_pattern_buffer pat; // or regexp_t (which is a pointer) int currnum=0, currspot=0, len=0; char fm[256], tr[256]; char *errstr; int i, pos=-1; if (!regex) { if (matchlen) *matchlen = 0; return -2; } if (n<0) { if (matchlen) *matchlen = 0; return -1; } memset(&pat,0,sizeof(pat)); for (i=0;i<256;i++) tr[i] = i; if (!sense) for (i='A';i<='Z';i++) tr[i] = i- 'A' + 'a'; pat.translate=tr; pat.fastmap=fm; errstr = re_compile_pattern((char *)regex,strlen(regex),&pat); if (errstr) { if (matchlen) *matchlen = 0; if (pat.buffer) free(pat.buffer); return -2; } while ((currnum <= n) && ((pos = re_search_pattern(&pat, buffer, length, currspot, length-currspot,0)) >= 0)) { len = re_match_pattern(&pat,buffer,length,pos,0); if(len>0 && currnum <= n){ currspot = (pos+len); currnum++; } else { pos = -1; break; } } if (pos < 0) len = 0; if (matchlen) *matchlen = len; if (pat.buffer) free(pat.buffer); return pos; } - (int)rspotOfRegex:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense length:(int *)matchlen { int num = [self numOfRegex:regex caseSensitive:sense]; return [self spotOfRegex:regex occurrenceNum:num-1-n caseSensitive:sense length:matchlen]; } - replaceRegex:(const char *)regex with:(const char *)aString occurrenceNum:(int)n caseSensitive:(BOOL)sense { int spot, len; spot = [self spotOfRegex:regex occurrenceNum:n caseSensitive:sense length:&len]; if ((spot >= 0) && (len > 0)) [self replaceFrom:spot length:len with:aString]; return self; } // I thought about having this return a negative number on an error, but I decided // to return 0 on both error or not found, and the length of the matched // portion otherwise. That enables this method to be used kind of like a BOOL // in an if statement. - (int)matchesRegex:(const char *)regex caseSensitive:(BOOL)sense { int spot, len=0; if (!regex) return 0; spot = [self spotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:&len]; if (spot == 0) return len; return 0; } //Now all the convenience methods.... - (int)grep:(const char *)regex { return [self grep:regex occurrenceNum:0 caseSensitive:YES before:nil middle:nil after:nil];} - (int)grep:(const char *)regex caseSensitive:(BOOL)sense { return [self grep:regex occurrenceNum:0 caseSensitive:sense before:nil middle:nil after:nil];} - (int)grep:(const char *)regex occurrenceNum:(int)n { return [self grep:regex occurrenceNum:n caseSensitive:YES before:nil middle:nil after:nil];} - (int)grep:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense { return [self grep:regex occurrenceNum:n caseSensitive:sense before:nil middle:nil after:nil];} - (int)grep:(const char *)regex before:(id)b middle:(id)m after:(id)a { return [self grep:regex occurrenceNum:0 caseSensitive:YES before:b middle:m after:a];} - (int)grep:(const char *)regex caseSensitive:(BOOL)sense before:(id)b middle:(id)m after:(id)a { return [self grep:regex occurrenceNum:0 caseSensitive:sense before:b middle:m after:a];} - (int)grep:(const char *)regex occurrenceNum:(int)n before:(id)b middle:(id)m after:(id)a { return [self grep:regex occurrenceNum:n caseSensitive:YES before:b middle:m after:a];} - (int)numOfRegex:(const char *)regex { return [self numOfRegex:regex caseSensitive:YES];} - (int)replaceEveryOccurrenceOfRegex:(const char *)regex with:(const char *)aString { return [self replaceEveryOccurrenceOfRegex:regex with:aString caseSensitive:YES];} - (int)replaceEveryOccurrenceOfRegex:(const char *)regex withChar:(char)aChar caseSensitive:(BOOL)sense { char str[2]; if (!aChar) return -1; //or should we let this go? str[1] = 0; str[0] = aChar; return [self replaceEveryOccurrenceOfRegex:regex with:str caseSensitive:sense]; } - (int)replaceEveryOccurrenceOfRegex:(const char *)regex withChar:(char)aChar { return [self replaceEveryOccurrenceOfRegex:regex withChar:aChar caseSensitive:YES];} - (int)replaceEveryOccurrenceOfRegex:(const char *)regex withString:(id)sender { return [self replaceEveryOccurrenceOfRegex:regex withString:sender caseSensitive:YES];} - (int)replaceEveryOccurrenceOfRegex:(const char *)regex withString:(id)sender caseSensitive:(BOOL)sense { if (![sender respondsTo:@selector(stringValue)]) return -1; return [self replaceEveryOccurrenceOfRegex:regex with:[sender stringValue] caseSensitive:sense]; } - (int)spotOfRegex:(const char *)regex { return [self spotOfRegex:regex occurrenceNum:0 caseSensitive:YES length:NULL];} - (int)spotOfRegex:(const char *)regex caseSensitive:(BOOL)sense { return [self spotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:NULL];} - (int)spotOfRegex:(const char *)regex occurrenceNum:(int)n { return [self spotOfRegex:regex occurrenceNum:n caseSensitive:YES length:NULL];} - (int)spotOfRegex:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense { return [self spotOfRegex:regex occurrenceNum:n caseSensitive:sense length:NULL];} - (int)spotOfRegex:(const char *)regex length:(int *)matchlen { return [self spotOfRegex:regex occurrenceNum:0 caseSensitive:YES length:matchlen];} - (int)spotOfRegex:(const char *)regex caseSensitive:(BOOL)sense length:(int *)matchlen { return [self spotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:matchlen];} - (int)spotOfRegex:(const char *)regex occurrenceNum:(int)n length:(int *)matchlen { return [self spotOfRegex:regex occurrenceNum:n caseSensitive:YES length:matchlen];} - (int)rspotOfRegex:(const char *)regex { return [self rspotOfRegex:regex occurrenceNum:0 caseSensitive:YES length:NULL];} - (int)rspotOfRegex:(const char *)regex caseSensitive:(BOOL)sense { return [self rspotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:NULL];} - (int)rspotOfRegex:(const char *)regex occurrenceNum:(int)n { return [self rspotOfRegex:regex occurrenceNum:n caseSensitive:YES length:NULL];} - (int)rspotOfRegex:(const char *)regex occurrenceNum:(int)n caseSensitive:(BOOL)sense { return [self rspotOfRegex:regex occurrenceNum:n caseSensitive:sense length:NULL];} - (int)rspotOfRegex:(const char *)regex length:(int *)matchlen { return [self rspotOfRegex:regex occurrenceNum:0 caseSensitive:YES length:matchlen];} - (int)rspotOfRegex:(const char *)regex caseSensitive:(BOOL)sense length:(int *)matchlen { return [self rspotOfRegex:regex occurrenceNum:0 caseSensitive:sense length:matchlen];} - (int)rspotOfRegex:(const char *)regex occurrenceNum:(int)n length:(int *)matchlen { return [self rspotOfRegex:regex occurrenceNum:n caseSensitive:YES length:matchlen];} - replaceRegex:(const char *)regex with:(const char *)aString { return [self replaceRegex:regex with:aString occurrenceNum:0 caseSensitive:YES];} - replaceRegex:(const char *)regex with:(const char *)aString caseSensitive:(BOOL)sense { return [self replaceRegex:regex with:aString occurrenceNum:0 caseSensitive:sense];} - replaceRegex:(const char *)regex with:(const char *)aString occurrenceNum:(int)n { return [self replaceRegex:regex with:aString occurrenceNum:n caseSensitive:YES];} - replaceRegex:(const char *)regex withChar:(char)aChar { return [self replaceRegex:regex withChar:aChar occurrenceNum:0 caseSensitive:YES];} - replaceRegex:(const char *)regex withChar:(char)aChar caseSensitive:(BOOL)sense { return [self replaceRegex:regex withChar:aChar occurrenceNum:0 caseSensitive:sense];} - replaceRegex:(const char *)regex withChar:(char)aChar occurrenceNum:(int)n { return [self replaceRegex:regex withChar:aChar occurrenceNum:n caseSensitive:YES]; } - replaceRegex:(const char *)regex withChar:(char)aChar occurrenceNum:(int)n caseSensitive:(BOOL)sense { char str[2]; if (!aChar) return nil; //or self? or check for this at all? str[1] = 0; str[0] = aChar; return [self replaceRegex:regex with:str occurrenceNum:n caseSensitive:sense]; } - replaceRegex:(const char *)regex withString:(id)sender { return [self replaceRegex:regex withString:sender occurrenceNum:0 caseSensitive:YES];} - replaceRegex:(const char *)regex withString:(id)sender caseSensitive:(BOOL)sense { return [self replaceRegex:regex withString:sender occurrenceNum:0 caseSensitive:sense]; } - replaceRegex:(const char *)regex withString:(id)sender occurrenceNum:(int)n { return [self replaceRegex:regex withString:sender occurrenceNum:n caseSensitive:YES]; } - replaceRegex:(const char *)regex withString:(id)sender occurrenceNum:(int)n caseSensitive:(BOOL)sense { if (![sender respondsTo:@selector(stringValue)]) return self; //hmmm return [self replaceRegex:regex with:[sender stringValue] occurrenceNum:n caseSensitive:sense]; } - (int)matchesRegex:(const char *)regex { return [self matchesRegex:regex caseSensitive:YES];} @end
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.