This is NSString+MiscRegex.m in view mode; [Download] [Up]
// // NSString+MiscRegex.m // Regular expression matching and replacement routines using regexpr.c // // Written by Carl Lindberg Copyright (c) 1997 by Carl Lindberg. // Version 1.0 All rights reserved. // This notice may not be removed from this source code. // // This object is included in the MiscKit by permission from the author // and its use is governed by the MiscKit license, found in the file // "LICENSE.rtf" in the MiscKit distribution. Please refer to that file // for a list of all applicable permissions and restrictions. // // This file formatted with 4 spaces per tabstop. #import "NSString+MiscRegex.h" //#import <misckit/NSString+MiscRegex.h> #import <Foundation/NSException.h> #import <Foundation/NSValue.h> #import <Foundation/NSCharacterSet.h> #import <Foundation/NSArray.h> #import <Foundation/NSDictionary.h> #import <Foundation/NSUtilities.h> #import <Foundation/NSScanner.h> #import <stdlib.h> #import <string.h> NSString *MiscRegexException = @"MiscRegexException"; NSString *MiscBeforePiece = @"MiscBeforePiece"; NSString *MiscMiddlePiece = @"MiscMiddlePiece"; NSString *MiscAfterPiece = @"MiscAfterPiece"; NSString *MiscBeforeRangePiece = @"MiscBeforeRangePiece"; NSString *MiscMiddleRangePiece = @"MiscMiddleRangePiece"; NSString *MiscAfterRangePiece = @"MiscAfterRangePiece"; NSString *MiscSubstringsPiece = @"MiscSubstringsPiece"; NSString *MiscSubrangesPiece = @"MiscSubrangesPiece"; /* Private functions */ static NSRange cStringRange(NSString *aString); static NSRange _rangeFromReg(regexp_registers_t regs, int idx); static NSString *_substringFromIndex(const char *cString, unsigned from, unsigned fullLength); static NSString *_substringToIndex(const char *cString, unsigned to); static NSString *_substringFromRange(const char *cString, NSRange range); static NSArray *_substringsForRegisters(const char *cString, regexp_registers_t registers); static NSArray *_subrangesForRegisters(regexp_registers_t registers); static int _numOfRegexStruct(const char *cString, regexp_t pattern, int options, NSRange range); static BOOL _findRegexStruct(const char *cString, regexp_t pattern, unsigned mask, NSRange range, int n, regexp_registers_t registers); static NSString *_processedStringFor(const char *cString, NSString *replaceString, unsigned mask, regexp_registers_t registers); @implementation NSString (MiscRegex) - (NSRange)rangeOfRegexStruct:(regexp_t)regex options:(unsigned)mask occurrenceNum:(int)n range:(NSRange)range { struct re_registers registers; NSRange foundRange = { NSNotFound, 0 }; if (_findRegexStruct([self cString], regex, mask, range, n, ®isters)) { foundRange = _rangeFromReg(®isters, 0); } return foundRange; } - grepRegexStruct:(regexp_t)regex forPiece:(NSString *)key options:(unsigned)mask occurrenceNum:(int)n { NSArray *keyArray = [NSArray arrayWithObject:key]; NSDictionary *theDict = [self grepRegexStruct:regex forPieces:keyArray options:mask occurrenceNum:n]; return [theDict objectForKey:key]; } - (NSDictionary *)grepRegexStruct:(regexp_t)regex forPieces:(NSArray *)keys options:(unsigned)mask occurrenceNum:(int)n { struct re_registers registers; const char * cString = [self cString]; NSRange searchRange = cStringRange(self); unsigned cStringLen = searchRange.length; NSRange foundRange = { NSNotFound, 0 }; NSString *currKey; NSEnumerator *keyEnumerator = [keys objectEnumerator]; NSMutableDictionary *resultDict = [NSMutableDictionary dictionaryWithCapacity:[keys count]]; if (_findRegexStruct(cString, regex, mask, searchRange, n, ®isters)) { foundRange = _rangeFromReg(®isters,0); } while (currKey = [keyEnumerator nextObject]) { id theObject = nil; if ([currKey isEqualToString:MiscBeforePiece]) { if (foundRange.location == NSNotFound) theObject = self; else theObject = _substringToIndex(cString, foundRange.location); } else if ([currKey isEqualToString:MiscMiddlePiece]) { if (foundRange.location == NSNotFound) theObject = @""; else theObject = _substringFromRange(cString, foundRange); } else if ([currKey isEqualToString:MiscAfterPiece]) { if (foundRange.location == NSNotFound) theObject = @""; else theObject = _substringFromIndex(cString, NSMaxRange(foundRange), cStringLen); } else if ([currKey isEqualToString:MiscBeforeRangePiece]) { NSRange aRange = { 0, [self cStringLength] }; if (foundRange.location != NSNotFound) aRange.length = foundRange.location; theObject = [NSValue value:&aRange withObjCType:@encode(NSRange)]; } else if ([currKey isEqualToString:MiscMiddleRangePiece]) { theObject = [NSValue value:&foundRange withObjCType:@encode(NSRange)]; } else if ([currKey isEqualToString:MiscAfterRangePiece]) { NSRange aRange = { NSNotFound, 0 }; if (foundRange.location != NSNotFound) { aRange.location = NSMaxRange(foundRange); aRange.length = [self cStringLength] - aRange.location; } theObject = [NSValue value:&aRange withObjCType:@encode(NSRange)]; } else if ([currKey isEqualToString:MiscSubstringsPiece]) { if (foundRange.location == NSNotFound) theObject = [NSArray array]; else theObject = _substringsForRegisters(cString, ®isters); } else if ([currKey isEqualToString:MiscSubrangesPiece]) { if (foundRange.location == NSNotFound) theObject = [NSArray array]; else theObject = _subrangesForRegisters(®isters); } if (theObject) [resultDict setObject:theObject forKey:currKey]; } return resultDict; } - (NSArray *)rangesMatchedByRegexStruct:(regexp_t)pattern options:(unsigned)mask { struct re_registers registers; char *myString = (char*)[self cString]; int myLength = [self cStringLength]; int startSpot = 0; int currSpot = 0; int lastEnd = 0; int posFound = -1; NSRange foundRange; NSMutableArray *rangeArray = [NSMutableArray array]; NSValue *currValue; while ((currSpot <= myLength) && (posFound = re_search_pattern(pattern, myString, myLength, currSpot, myLength-currSpot, ®isters)) >= 0) { foundRange = _rangeFromReg(®isters, 0); /* * We don't want to have a zero-length match at the same spot a * previous match ended. So, start looking at the next spot. */ if (lastEnd == foundRange.location && foundRange.length == 0 && currSpot > startSpot) { currSpot++; continue; } currValue = [NSValue value:&foundRange withObjCType:@encode(NSRange)]; [rangeArray addObject:currValue]; lastEnd = currSpot = NSMaxRange(foundRange); if(foundRange.length == 0) currSpot++; } if (posFound == -2) [NSException raise:MiscRegexException format:@"Error during regex search"]; return rangeArray; } - (NSArray*)stringsMatchedByRegexStruct:(regexp_t)pattern options:(unsigned)mask { const char *cString = [self cString]; NSRange stringRange; NSEnumerator *rangeEnum; NSArray *rangesArray; NSMutableArray *stringsArray = [NSMutableArray array]; NSValue *currValue; rangesArray = [self rangesMatchedByRegexStruct:pattern options:mask]; rangeEnum = [rangesArray objectEnumerator]; while (currValue = [rangeEnum nextObject]) { [currValue getValue:&stringRange]; [stringsArray addObject:_substringFromRange(cString, stringRange)]; } return stringsArray; } - (NSArray *)componentsSeparatedByRegexStruct:(regexp_t)pattern options:(unsigned)mask { const char *cString = [self cString]; NSRange matchRange, stringRange; NSEnumerator *rangeEnum; NSArray *rangesArray; NSMutableArray *stringsArray = [NSMutableArray array]; NSValue *currValue; unsigned lastMatchEnd = 0; rangesArray = [self rangesMatchedByRegexStruct:pattern options:mask]; rangeEnum = [rangesArray objectEnumerator]; while (currValue = [rangeEnum nextObject]) { [currValue getValue:&matchRange]; stringRange.location = lastMatchEnd; stringRange.length = matchRange.location - stringRange.location; [stringsArray addObject:_substringFromRange(cString, stringRange)]; lastMatchEnd = NSMaxRange(matchRange); } stringRange.location = lastMatchEnd; stringRange.length = [self cStringLength] - lastMatchEnd; [stringsArray addObject:_substringFromRange(cString, stringRange)]; return stringsArray; } - (NSString *)stringByReplacingRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString options:(unsigned)mask occurrenceNum:(int)n range:(NSRange)range { struct re_registers registers; const char *cString = [self cString]; NSRange theRange; NSString *realReplaceString; if (_findRegexStruct(cString, pattern, mask, range, n, ®isters)) { realReplaceString = _processedStringFor(cString, replaceString, mask, ®isters); theRange = _rangeFromReg(®isters, 0); return [NSString stringWithFormat:@"%@%@%@", _substringToIndex(cString, theRange.location), realReplaceString, _substringFromIndex(cString, NSMaxRange(theRange), [self cStringLength])]; } return self; } - (NSString *)stringByReplacingEveryOccurrenceOfRegexStruct:(regexp_t)pattern withString:(NSString *)aString options:(unsigned)mask range:(NSRange)range { struct re_registers registers; char *myString = (char*)[self cString]; unsigned cStringLen = [self cStringLength]; int myLength = NSMaxRange(range); int startSpot = range.location; int numFound = 0; int currSpot = range.location; int lastEnd = 0; int posFound = -1; NSMutableString *newString = [NSMutableString stringWithCapacity:[self length]]; NSRange foundRange; NSRange betweenRange; NSString *replaceString; while ((currSpot <= myLength) && (posFound = re_search_pattern(pattern, myString, myLength, currSpot, myLength-currSpot, ®isters)) >= 0) { foundRange = _rangeFromReg(®isters, 0); betweenRange.location = lastEnd; betweenRange.length = foundRange.location - lastEnd; if (betweenRange.length == 0 && foundRange.length == 0 && currSpot > startSpot) { currSpot++; continue; } if (betweenRange.length > 0) [newString appendString:_substringFromRange(myString, betweenRange)]; replaceString = _processedStringFor(myString, aString, mask, ®isters); [newString appendString:replaceString]; lastEnd = currSpot = NSMaxRange(foundRange); if(foundRange.length == 0) currSpot++; numFound++; } if (posFound == -2) [NSException raise:MiscRegexException format:@"Error during regex search"]; if (lastEnd < cStringLen) [newString appendString:_substringFromIndex(myString, lastEnd, cStringLen)]; return newString; } - (unsigned)numOfRegexStruct:(regexp_t)pattern options:(unsigned)mask range:(NSRange)range { return _numOfRegexStruct([self cString], pattern, mask, range); } -(BOOL)isValidRegex; /* * Unfortunately, re_compile_pattern() doesn't catch all possible problems * -- it accepts things like "a**", which will cause a runtime error when * searching. Most errors are caught though. So, I changed the * implementation to be an NS_DURING/NSHANDLER around a call to _findRegex, * which ensures the above errors are caught. I wanted to avoid having to * do that, but... */ { regexp_t pattern = NULL; BOOL isValid; NS_DURING pattern = MiscNewRegexStruct(self, 0); [@"foobar _20\n30baz" numOfRegexStruct:pattern]; isValid = YES; NS_HANDLER isValid = NO; NS_ENDHANDLER if (pattern) MiscFreeRegexStruct(pattern); return isValid; /* * The original implementation, which didn't have to resort to catching * exceptions but missed a few bad patterns. */ /* struct re_pattern_buffer pattern; char translate[256]; char *errorString; int i; memset(&pattern, 0, sizeof(struct re_pattern_buffer)); for (i=0;i<256;i++) translate[i] = i; pattern.translate = translate; errorString = re_compile_pattern((char*)[self cString], [self cStringLength], &pattern); if (pattern.buffer) free(pattern.buffer); if (errorString) return NO; else return YES; */ } - (unsigned)numOfRegex:(NSString *)regex options:(unsigned)mask range:(NSRange)range { regexp_t pattern = MiscNewRegexStruct(regex, mask); unsigned retVal; retVal = [self numOfRegexStruct:pattern options:mask range:range]; MiscFreeRegexStruct(pattern); return retVal; } - (NSRange)rangeOfRegex:(NSString *)regex options:(unsigned)mask occurrenceNum:(int)n range:(NSRange)range { regexp_t pattern = MiscNewRegexStruct(regex, mask); NSRange retVal; retVal = [self rangeOfRegexStruct:pattern options:mask occurrenceNum:n range:range]; MiscFreeRegexStruct(pattern); return retVal; } - grep:(NSString *)regex forPiece:(NSString *)key options:(unsigned)mask occurrenceNum:(int)n; { regexp_t pattern = MiscNewRegexStruct(regex, mask); id retVal; retVal = [self grepRegexStruct:pattern forPiece:key options:mask occurrenceNum:n]; MiscFreeRegexStruct(pattern); return retVal; } - (NSDictionary *)grep:(NSString *)regex forPieces:(NSArray *)keys options:(unsigned)mask occurrenceNum:(int)n { regexp_t pattern = MiscNewRegexStruct(regex, mask); NSDictionary *retVal; retVal = [self grepRegexStruct:pattern forPieces:keys options:mask occurrenceNum:n]; MiscFreeRegexStruct(pattern); return retVal; } - (NSString *)stringByReplacingEveryOccurrenceOfRegex:(NSString *)regex withString:(NSString *)aString options:(unsigned)mask range:(NSRange)range { regexp_t pattern = MiscNewRegexStruct(regex, mask); id retVal; retVal = [self stringByReplacingEveryOccurrenceOfRegexStruct:pattern withString:aString options:mask range:range]; MiscFreeRegexStruct(pattern); return retVal; } - (NSString *)stringByReplacingRegex:(NSString*)regex withString:(NSString*)replaceString options:(unsigned)mask occurrenceNum:(int)n range:(NSRange)range { regexp_t pattern = MiscNewRegexStruct(regex, mask); id retVal; retVal = [self stringByReplacingRegexStruct:pattern withString:replaceString options:mask occurrenceNum:n range:range]; MiscFreeRegexStruct(pattern); return retVal; } - (NSArray *)stringsMatchedByRegex:(NSString *)regex options:(unsigned)mask { regexp_t pattern = MiscNewRegexStruct(regex, mask); NSArray *retVal; retVal = [self stringsMatchedByRegexStruct:pattern options:mask]; MiscFreeRegexStruct(pattern); return retVal; } - (NSArray *)rangesMatchedByRegex:(NSString *)regex options:(unsigned)mask { regexp_t pattern = MiscNewRegexStruct(regex, mask); NSArray *retVal; retVal = [self rangesMatchedByRegexStruct:pattern options:mask]; MiscFreeRegexStruct(pattern); return retVal; } - (NSArray *)componentsSeparatedByRegex:(NSString *)regex options:(unsigned)mask { regexp_t pattern = MiscNewRegexStruct(regex, mask); NSArray *retVal; retVal = [self componentsSeparatedByRegexStruct:pattern options:mask]; MiscFreeRegexStruct(pattern); return retVal; } /*******************************************************************/ /************** Convenience methods ******************************/ /*******************************************************************/ - (unsigned)numOfRegex:(NSString *)regex { return [self numOfRegex:regex options:0 range:cStringRange(self)]; } - (unsigned)numOfRegex:(NSString *)regex options:(unsigned)mask { return [self numOfRegex:regex options:mask range:cStringRange(self)]; } - (unsigned)numOfRegex:(NSString *)regex range:(NSRange)range { return [self numOfRegex:regex options:0 range:range]; } - (NSRange)rangeOfRegex:(NSString *)regex { return [self rangeOfRegex:regex options:0 occurrenceNum:0 range:cStringRange(self)]; } - (NSRange)rangeOfRegex:(NSString *)regex options:(unsigned)mask { return [self rangeOfRegex:regex options:mask occurrenceNum:0 range:cStringRange(self)]; } - (NSRange)rangeOfRegex:(NSString *)regex occurrenceNum:(int)n { return [self rangeOfRegex:regex options:0 occurrenceNum:n range:cStringRange(self)]; } - (NSRange)rangeOfRegex:(NSString *)regex options:(unsigned)mask occurrenceNum:(int)n { return [self rangeOfRegex:regex options:mask occurrenceNum:n range:cStringRange(self)]; } - (NSRange)rangeOfRegex:(NSString *)regex range:(NSRange)range { return [self rangeOfRegex:regex options:0 occurrenceNum:0 range:range]; } - (NSRange)rangeOfRegex:(NSString *)regex options:(unsigned)mask range:(NSRange)range { return [self rangeOfRegex:regex options:mask occurrenceNum:0 range:range]; } - (NSRange)rangeOfRegex:(NSString *)regex occurrenceNum:(int)n range:(NSRange)range { return [self rangeOfRegex:regex options:0 occurrenceNum:n range:range]; } - (NSDictionary *)grep:(NSString *)regex forPieces:(NSArray *)keys { return [self grep:regex forPieces:keys options:0 occurrenceNum:0]; } - (NSDictionary *)grep:(NSString *)regex forPieces:(NSArray *)keys options:(unsigned)mask { return [self grep:regex forPieces:keys options:mask occurrenceNum:0]; } - (NSDictionary *)grep:(NSString *)regex forPieces:(NSArray *)keys occurrenceNum:(int)n { return [self grep:regex forPieces:keys options:0 occurrenceNum:n]; } - grep:(NSString *)regex forPiece:(NSString *)keys { return [self grep:regex forPiece:keys options:0 occurrenceNum:0]; } - grep:(NSString *)regex forPiece:(NSString *)keys options:(unsigned)mask { return [self grep:regex forPiece:keys options:mask occurrenceNum:0]; } - grep:(NSString *)regex forPiece:(NSString *)keys occurrenceNum:(int)n { return [self grep:regex forPiece:keys options:0 occurrenceNum:n]; } - (NSString *)stringByReplacingEveryOccurrenceOfRegex:(NSString *)regex withString:(NSString *)aString { return [self stringByReplacingEveryOccurrenceOfRegex:regex withString:aString options:0 range:cStringRange(self)]; } - (NSString *)stringByReplacingEveryOccurrenceOfRegex:(NSString *)regex withString:(NSString *)aString options:(unsigned)mask { return [self stringByReplacingEveryOccurrenceOfRegex:regex withString:aString options:mask range:cStringRange(self)]; } - (NSString *)stringByReplacingEveryOccurrenceOfRegex:(NSString *)regex withString:(NSString *)aString range:(NSRange)range { return [self stringByReplacingEveryOccurrenceOfRegex:regex withString:aString options:0 range:range]; } - (NSString *)stringByReplacingRegex:(NSString*)regex withString:(NSString*)replaceString { return [self stringByReplacingRegex:regex withString:replaceString options:0 occurrenceNum:0 range:cStringRange(self)]; } - (NSString *)stringByReplacingRegex:(NSString*)regex withString:(NSString*)replaceString options:(unsigned)mask { return [self stringByReplacingRegex:regex withString:replaceString options:mask occurrenceNum:0 range:cStringRange(self)]; } - (NSString *)stringByReplacingRegex:(NSString*)regex withString:(NSString*)replaceString occurrenceNum:(int)n { return [self stringByReplacingRegex:regex withString:replaceString options:0 occurrenceNum:n range:cStringRange(self)]; } - (NSString *)stringByReplacingRegex:(NSString*)regex withString:(NSString*)replaceString options:(unsigned)mask occurrenceNum:(int)n { return [self stringByReplacingRegex:regex withString:replaceString options:mask occurrenceNum:n range:cStringRange(self)]; } - (NSString *)stringByReplacingRegex:(NSString*)regex withString:(NSString*)replaceString range:(NSRange)range { return [self stringByReplacingRegex:regex withString:replaceString options:0 occurrenceNum:0 range:range]; } - (NSString *)stringByReplacingRegex:(NSString*)regex withString:(NSString*)replaceString options:(unsigned)mask range:(NSRange)range { return [self stringByReplacingRegex:regex withString:replaceString options:mask occurrenceNum:0 range:range]; } - (NSString *)stringByReplacingRegex:(NSString*)regex withString:(NSString*)replaceString occurrenceNum:(int)n range:(NSRange)range { return [self stringByReplacingRegex:regex withString:replaceString options:0 occurrenceNum:n range:range]; } - (NSArray*)stringsMatchedByRegex:(NSString *)regex { return [self stringsMatchedByRegex:regex options:0]; } - (NSArray*)rangesMatchedByRegex:(NSString *)regex { return [self rangesMatchedByRegex:regex options:0]; } - (NSArray*)componentsSeparatedByRegex:(NSString *)regex { return [self componentsSeparatedByRegex:regex options:0]; } - (unsigned)numOfRegexStruct:(regexp_t)pattern { return [self numOfRegexStruct:pattern options:0 range:cStringRange(self)]; } - (unsigned)numOfRegexStruct:(regexp_t)pattern options:(unsigned)mask { return [self numOfRegexStruct:pattern options:mask range:cStringRange(self)]; } - (unsigned)numOfRegexStruct:(regexp_t)pattern range:(NSRange)range { return [self numOfRegexStruct:pattern options:0 range:range]; } - (NSRange)rangeOfRegexStruct:(regexp_t)pattern { return [self rangeOfRegexStruct:pattern options:0 occurrenceNum:0 range:cStringRange(self)]; } - (NSRange)rangeOfRegexStruct:(regexp_t)pattern options:(unsigned)mask { return [self rangeOfRegexStruct:pattern options:mask occurrenceNum:0 range:cStringRange(self)]; } - (NSRange)rangeOfRegexStruct:(regexp_t)pattern occurrenceNum:(int)n { return [self rangeOfRegexStruct:pattern options:0 occurrenceNum:n range:cStringRange(self)]; } - (NSRange)rangeOfRegexStruct:(regexp_t)pattern options:(unsigned)mask occurrenceNum:(int)n { return [self rangeOfRegexStruct:pattern options:mask occurrenceNum:n range:cStringRange(self)]; } - (NSRange)rangeOfRegexStruct:(regexp_t)pattern range:(NSRange)range { return [self rangeOfRegexStruct:pattern options:0 occurrenceNum:0 range:range]; } - (NSRange)rangeOfRegexStruct:(regexp_t)pattern options:(unsigned)mask range:(NSRange)range { return [self rangeOfRegexStruct:pattern options:mask occurrenceNum:0 range:range]; } - (NSRange)rangeOfRegexStruct:(regexp_t)pattern occurrenceNum:(int)n range:(NSRange)range { return [self rangeOfRegexStruct:pattern options:0 occurrenceNum:n range:range]; } - (NSDictionary *)grepRegexStruct:(regexp_t)pattern forPieces:(NSArray *)keys { return [self grepRegexStruct:pattern forPieces:keys options:0 occurrenceNum:0]; } - (NSDictionary *)grepRegexStruct:(regexp_t)pattern forPieces:(NSArray *)keys options:(unsigned)mask { return [self grepRegexStruct:pattern forPieces:keys options:mask occurrenceNum:0]; } - (NSDictionary *)grepRegexStruct:(regexp_t)pattern forPieces:(NSArray *)keys occurrenceNum:(int)n { return [self grepRegexStruct:pattern forPieces:keys options:0 occurrenceNum:n]; } - grepRegexStruct:(regexp_t)pattern forPiece:(NSString *)keys { return [self grepRegexStruct:pattern forPiece:keys options:0 occurrenceNum:0]; } - grepRegexStruct:(regexp_t)pattern forPiece:(NSString *)keys options:(unsigned)mask { return [self grepRegexStruct:pattern forPiece:keys options:mask occurrenceNum:0]; } - grepRegexStruct:(regexp_t)pattern forPiece:(NSString *)keys occurrenceNum:(int)n { return [self grepRegexStruct:pattern forPiece:keys options:0 occurrenceNum:n]; } - (NSString *)stringByReplacingEveryOccurrenceOfRegexStruct:(regexp_t)pattern withString:(NSString *)aString { return [self stringByReplacingEveryOccurrenceOfRegexStruct:pattern withString:aString options:0 range:cStringRange(self)]; } - (NSString *)stringByReplacingEveryOccurrenceOfRegexStruct:(regexp_t)pattern withString:(NSString *)aString options:(unsigned)mask { return [self stringByReplacingEveryOccurrenceOfRegexStruct:pattern withString:aString options:mask range:cStringRange(self)]; } - (NSString *)stringByReplacingEveryOccurrenceOfRegexStruct:(regexp_t)pattern withString:(NSString *)aString range:(NSRange)range { return [self stringByReplacingEveryOccurrenceOfRegexStruct:pattern withString:aString options:0 range:range]; } - (NSString *)stringByReplacingRegexStruct:(regexp_t)pattern withString:(NSString*)replaceString { return [self stringByReplacingRegexStruct:pattern withString:replaceString options:0 occurrenceNum:0 range:cStringRange(self)]; } - (NSString *)stringByReplacingRegexStruct:(regexp_t)pattern withString:(NSString*)replaceString options:(unsigned)mask { return [self stringByReplacingRegexStruct:pattern withString:replaceString options:mask occurrenceNum:0 range:cStringRange(self)]; } - (NSString *)stringByReplacingRegexStruct:(regexp_t)pattern withString:(NSString*)replaceString occurrenceNum:(int)n { return [self stringByReplacingRegexStruct:pattern withString:replaceString options:0 occurrenceNum:n range:cStringRange(self)]; } - (NSString *)stringByReplacingRegexStruct:(regexp_t)pattern withString:(NSString*)replaceString options:(unsigned)mask occurrenceNum:(int)n { return [self stringByReplacingRegexStruct:pattern withString:replaceString options:mask occurrenceNum:n range:cStringRange(self)]; } - (NSString *)stringByReplacingRegexStruct:(regexp_t)pattern withString:(NSString*)replaceString range:(NSRange)range { return [self stringByReplacingRegexStruct:pattern withString:replaceString options:0 occurrenceNum:0 range:range]; } - (NSString *)stringByReplacingRegexStruct:(regexp_t)pattern withString:(NSString*)replaceString options:(unsigned)mask range:(NSRange)range { return [self stringByReplacingRegexStruct:pattern withString:replaceString options:mask occurrenceNum:0 range:range]; } - (NSString *)stringByReplacingRegexStruct:(regexp_t)pattern withString:(NSString*)replaceString occurrenceNum:(int)n range:(NSRange)range { return [self stringByReplacingRegexStruct:pattern withString:replaceString options:0 occurrenceNum:n range:range]; } - (NSArray*)stringsMatchedByRegexStruct:(regexp_t)pattern { return [self stringsMatchedByRegexStruct:pattern options:0]; } - (NSArray*)rangesMatchedByRegexStruct:(regexp_t)pattern { return [self rangesMatchedByRegexStruct:pattern options:0]; } - (NSArray*)componentsSeparatedByRegexStruct:(regexp_t)pattern { return [self componentsSeparatedByRegexStruct:pattern options:0]; } @end @implementation NSMutableString (MiscRegex) - (void)replaceEveryOccurrenceOfRegex:(NSString *)regex withString:(NSString *)replaceString options:(unsigned)mask range:(NSRange)range { NSString *newString = [self stringByReplacingEveryOccurrenceOfRegex:regex withString:replaceString options:mask range:range]; [self setString:newString]; } - (void)replaceEveryOccurrenceOfRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString options:(unsigned)mask range:(NSRange)range { NSString *newString = [self stringByReplacingEveryOccurrenceOfRegexStruct:pattern withString:replaceString options:mask range:range]; [self setString:newString]; } - (void)replaceRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString options:(unsigned)mask occurrenceNum:(int)n range:(NSRange)range { struct re_registers registers; if (_findRegexStruct([self cString], pattern, mask, range, n, ®isters)) { NSRange foundRange = _rangeFromReg(®isters, 0); NSString *processedString; processedString = _processedStringFor([self cString], replaceString, mask, ®isters); [self replaceCharactersInRange:foundRange withString:processedString]; } } - (void)replaceRegex:(NSString *)regex withString:(NSString *)replaceString options:(unsigned)mask occurrenceNum:(int)n range:(NSRange)range { regexp_t pattern = MiscNewRegexStruct(regex, mask); [self replaceRegexStruct:pattern withString:replaceString options:mask occurrenceNum:n range:range]; MiscFreeRegexStruct(pattern); } /*******************************************************************/ /************** Convenience methods ******************************/ /*******************************************************************/ - (void)replaceRegex:(NSString *)regex withString:(NSString *)replaceString { return [self replaceRegex:regex withString:replaceString options:0 occurrenceNum:0 range:cStringRange(self)]; } - (void)replaceRegex:(NSString *)regex withString:(NSString *)replaceString options:(unsigned)mask { return [self replaceRegex:regex withString:replaceString options:mask occurrenceNum:0 range:cStringRange(self)]; } - (void)replaceRegex:(NSString *)regex withString:(NSString *)replaceString occurrenceNum:(int)n { return [self replaceRegex:regex withString:replaceString options:0 occurrenceNum:n range:cStringRange(self)]; } - (void)replaceRegex:(NSString *)regex withString:(NSString *)replaceString options:(unsigned)mask occurrenceNum:(int)n { return [self replaceRegex:regex withString:replaceString options:mask occurrenceNum:n range:cStringRange(self)]; } - (void)replaceRegex:(NSString *)regex withString:(NSString *)replaceString range:(NSRange)range { return [self replaceRegex:regex withString:replaceString options:0 occurrenceNum:0 range:range]; } - (void)replaceRegex:(NSString *)regex withString:(NSString *)replaceString options:(unsigned)mask range:(NSRange)range { return [self replaceRegex:regex withString:replaceString options:mask occurrenceNum:0 range:range]; } - (void)replaceRegex:(NSString *)regex withString:(NSString *)replaceString occurrenceNum:(int)n range:(NSRange)range { return [self replaceRegex:regex withString:replaceString options:0 occurrenceNum:n range:range]; } - (void)replaceRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString { return [self replaceRegexStruct:pattern withString:replaceString options:0 occurrenceNum:0 range:cStringRange(self)]; } - (void)replaceRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString options:(unsigned)mask { return [self replaceRegexStruct:pattern withString:replaceString options:mask occurrenceNum:0 range:cStringRange(self)]; } - (void)replaceRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString occurrenceNum:(int)n { return [self replaceRegexStruct:pattern withString:replaceString options:0 occurrenceNum:n range:cStringRange(self)]; } - (void)replaceRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString options:(unsigned)mask occurrenceNum:(int)n { return [self replaceRegexStruct:pattern withString:replaceString options:mask occurrenceNum:n range:cStringRange(self)]; } - (void)replaceRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString range:(NSRange)range { return [self replaceRegexStruct:pattern withString:replaceString options:0 occurrenceNum:0 range:range]; } - (void)replaceRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString options:(unsigned)mask range:(NSRange)range { return [self replaceRegexStruct:pattern withString:replaceString options:mask occurrenceNum:0 range:range]; } - (void)replaceRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString occurrenceNum:(int)n range:(NSRange)range { return [self replaceRegexStruct:pattern withString:replaceString options:0 occurrenceNum:n range:range]; } - (void)replaceEveryOccurrenceOfRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString { return [self replaceEveryOccurrenceOfRegexStruct:pattern withString:replaceString options:0 range:cStringRange(self)]; } - (void)replaceEveryOccurrenceOfRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString options:(unsigned)mask { return [self replaceEveryOccurrenceOfRegexStruct:pattern withString:replaceString options:mask range:cStringRange(self)]; } - (void)replaceEveryOccurrenceOfRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString range:(NSRange)range { return [self replaceEveryOccurrenceOfRegexStruct:pattern withString:replaceString options:0 range:range]; } - (void)replaceEveryOccurrenceOfRegex:(NSString *)regex withString:(NSString *)replaceString { return [self replaceEveryOccurrenceOfRegex:regex withString:replaceString options:0 range:cStringRange(self)]; } - (void)replaceEveryOccurrenceOfRegex:(NSString *)regex withString:(NSString *)replaceString options:(unsigned)mask { return [self replaceEveryOccurrenceOfRegex:regex withString:replaceString options:mask range:cStringRange(self)]; } - (void)replaceEveryOccurrenceOfRegex:(NSString *)regex withString:(NSString *)replaceString range:(NSRange)range { return [self replaceEveryOccurrenceOfRegex:regex withString:replaceString options:0 range:range]; } @end #define NS_CHARSET(str) [NSCharacterSet characterSetWithCharactersInString:str] static NSRange _rangeFromReg(regexp_registers_t regs, int idx) { NSRange theRange = {NSNotFound, 0}; if (regs->start[idx] >= 0) { theRange.location = regs->start[idx]; theRange.length = regs->end[idx] - theRange.location; } return theRange; } regexp_t MiscNewRegexStruct(NSString *regexString, unsigned options) { regexp_t pattern; char *translate; int i; char *errorString; if (!regexString) { [NSException raise:MiscRegexException format:@"MiscNewRegexStruct(): nil regex string"]; } pattern = (regexp_t)malloc( sizeof(struct re_pattern_buffer) ); translate = (char *) malloc( sizeof(char) * 256 ); memset(pattern, 0, sizeof(struct re_pattern_buffer)); for (i=0;i<256;i++) translate[i] = i; if (options & NSCaseInsensitiveSearch) { for (i='A'; i<='Z'; i++) translate[i] = i - 'A' + 'a'; } pattern->translate = translate; if (errorString = re_compile_pattern((char*)[regexString cString], [regexString cStringLength], pattern)) { free(translate); if (pattern->buffer) free(pattern->buffer); free(pattern); [NSException raise:MiscRegexException format: @"MiscNewRegexStruct(): Malformed regular expression '%@': %s", regexString, errorString]; } if (options & MiscFasterSearch) { char *fastMap = (char *)malloc( sizeof(char) * 256 ); // memset(fastMap,0,256); // the regexpr code does this pattern->fastmap = fastMap; re_compile_fastmap(pattern); } return pattern; } void MiscFreeRegexStruct(regexp_t pattern) { if (pattern->fastmap) free(pattern->fastmap); if (pattern->buffer) free(pattern->buffer); free (pattern->translate); free (pattern); } static NSString *_processedStringFor(const char *cString, NSString *replaceString, unsigned mask, regexp_registers_t registers) { int len = [replaceString length]; NSMutableString *processedString; static NSCharacterSet *digitCharacters; static NSCharacterSet *specialChars; static NSCharacterSet *ampersandSet; static NSCharacterSet *needToQuoteChars; static NSCharacterSet *noCharacterSet; NSScanner *scanner; NSString *tempString; unichar theChar, nextChar; NSRange theRange; int matchNum; unsigned scanLocation; if (!(mask & MiscUseMatchSubstitutions)) return replaceString; if (!digitCharacters) digitCharacters = [NSCharacterSet decimalDigitCharacterSet]; if (!specialChars) specialChars = NS_CHARSET(@"$&"); if (!ampersandSet) ampersandSet = NS_CHARSET(@"&"); if (!needToQuoteChars) needToQuoteChars = NS_CHARSET(@"&$"); if (!noCharacterSet) noCharacterSet = NS_CHARSET(@""); processedString = [NSMutableString stringWithCapacity:len]; scanner = [NSScanner scannerWithString:replaceString]; [scanner setCharactersToBeSkipped:noCharacterSet]; while (![scanner isAtEnd]) { matchNum = -1; tempString = nil; [scanner scanUpToCharactersFromSet:specialChars intoString:&tempString]; if (tempString) [processedString appendString:tempString]; scanLocation = [scanner scanLocation]; if (![scanner isAtEnd]) { theChar = [replaceString characterAtIndex:scanLocation]; if ([ampersandSet characterIsMember:theChar]) { matchNum = 0; [scanner setScanLocation:scanLocation+1]; } else if (scanLocation < (len-1)) // It's a dollar sign; see what { nextChar = [replaceString characterAtIndex:scanLocation+1]; if ([digitCharacters characterIsMember:nextChar]) { matchNum = nextChar - '0'; [scanner setScanLocation:scanLocation+2]; } else if ([needToQuoteChars characterIsMember:nextChar]) { [processedString appendFormat:@"%c", nextChar]; [scanner setScanLocation:scanLocation+2]; } else { [processedString appendString:@"$"]; [scanner setScanLocation:scanLocation+1]; } } else // it's a dollar sign at the end of the string { [processedString appendString:@"$"]; [scanner setScanLocation:scanLocation+1]; } if (matchNum >= 0 && (matchNum < RE_NREGS) && (registers->start[matchNum] >= 0)) { theRange = _rangeFromReg(registers, matchNum); [processedString appendString:_substringFromRange(cString, theRange)]; } } } return processedString; } static NSString *_substringFromRange(const char *cString, NSRange range) { return [NSString stringWithCString:cString+range.location length:range.length]; } static NSString *_substringToIndex(const char *cString, unsigned to) { return [NSString stringWithCString:cString length:to]; } static NSString *_substringFromIndex(const char *cString, unsigned from, unsigned fullLength) { return [NSString stringWithCString:cString + from length:fullLength - from]; } static NSRange cStringRange(NSString *aString) { NSRange theRange = { 0, [aString cStringLength] }; return theRange; } static BOOL _findRegexStruct(const char *cString, regexp_t pattern, unsigned mask, NSRange range, int n, regexp_registers_t registers) { char *myString = (char*)cString; int myLength = NSMaxRange(range); int startSpot = range.location; int numFound = 0; int currSpot = range.location; int lastEnd = 0; int posFound = -1; int matchLen = -1; if (mask & NSBackwardsSearch) { int num = _numOfRegexStruct(cString, pattern, mask, range); n = num - 1 - n; } if (n < 0) { return NO; } while ((numFound <= n) && (currSpot <= myLength) && ((posFound = re_search_pattern(pattern, myString, myLength, currSpot, myLength-currSpot, registers)) >= 0)) { matchLen = _rangeFromReg(registers, 0).length; if (matchLen == 0 && currSpot == lastEnd && currSpot > startSpot) { currSpot++; continue; } if (matchLen >= 0 && numFound <= n) { lastEnd=currSpot = posFound + matchLen; if (matchLen == 0) currSpot++; numFound++; } else { posFound = -1; break; } } if (n >= numFound) posFound = -1; if (posFound == -2 || matchLen == -2) [NSException raise:MiscRegexException format:@"Error while searching"]; if (posFound < 0) return NO; if (mask & NSAnchoredSearch) { if (mask & NSBackwardsSearch) { if (registers->end[0] == NSMaxRange(range)) return YES; } else { if (registers->start[0] == range.location) return YES; } return NO; } return YES; } static NSArray *_substringsForRegisters(const char *cString, regexp_registers_t registers) { NSMutableArray *substringArray = [NSMutableArray array]; NSRange currRange; int highestFound, i; for (i=0,highestFound=0; i < RE_NREGS; i++) if (registers->start[i] >= 0) highestFound = i; for (i=0; i <= highestFound; i++) { if (registers->start[i] >= 0) { currRange = _rangeFromReg(registers, i); [substringArray addObject:_substringFromRange(cString, currRange)]; } else { [substringArray addObject:@""]; } } return substringArray; } static NSArray *_subrangesForRegisters(regexp_registers_t registers) { NSMutableArray *rangesArray = [NSMutableArray array]; NSRange currRange; NSValue *currValue; int highestFound, i; for (i=0,highestFound=0; i < RE_NREGS; i++) if (registers->start[i] >= 0) highestFound = i; for (i=0; i <= highestFound; i++) { currRange = _rangeFromReg(registers, i); currValue = [NSValue value:&currRange withObjCType:@encode(NSRange)]; [rangesArray addObject:currValue]; } return rangesArray; } // int num = [self numOfRegexStruct:pattern options:mask]; static int _numOfRegexStruct(const char *cString, regexp_t pattern, int options, NSRange range) { struct re_registers registers; char *myString = (char*)cString; int myLength = NSMaxRange(range); int startSpot = 0; int currSpot = range.location; int lastEnd = 0; int posFound = -1; int numFound = 0; NSRange foundRange; while ((currSpot <= myLength) && (posFound = re_search_pattern(pattern, myString, myLength, currSpot, myLength-currSpot, ®isters)) >= 0) { foundRange = _rangeFromReg(®isters, 0); if (lastEnd == foundRange.location && foundRange.length == 0 && currSpot > startSpot) { currSpot++; continue; } numFound++; lastEnd = currSpot = NSMaxRange(foundRange); if(foundRange.length == 0) currSpot++; } if (posFound == -2) [NSException raise:MiscRegexException format:@"Error during regex search"]; return numFound; }
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.