This is NSString+MiscRegex.m in view mode; [Download] [Up]
//
// NSString+MiscRegex.m
// Regular expression matching and replacement routines using regexpr.c
//
// Written by Carl Lindberg Copyright (c) 1997 by Carl Lindberg.
// Version 1.0 All rights reserved.
// This notice may not be removed from this source code.
//
// This object is included in the MiscKit by permission from the author
// and its use is governed by the MiscKit license, found in the file
// "LICENSE.rtf" in the MiscKit distribution. Please refer to that file
// for a list of all applicable permissions and restrictions.
//
// This file formatted with 4 spaces per tabstop.
#import "NSString+MiscRegex.h"
//#import <misckit/NSString+MiscRegex.h>
#import <Foundation/NSException.h>
#import <Foundation/NSValue.h>
#import <Foundation/NSCharacterSet.h>
#import <Foundation/NSArray.h>
#import <Foundation/NSDictionary.h>
#import <Foundation/NSUtilities.h>
#import <Foundation/NSScanner.h>
#import <stdlib.h>
#import <string.h>
NSString *MiscRegexException = @"MiscRegexException";
NSString *MiscBeforePiece = @"MiscBeforePiece";
NSString *MiscMiddlePiece = @"MiscMiddlePiece";
NSString *MiscAfterPiece = @"MiscAfterPiece";
NSString *MiscBeforeRangePiece = @"MiscBeforeRangePiece";
NSString *MiscMiddleRangePiece = @"MiscMiddleRangePiece";
NSString *MiscAfterRangePiece = @"MiscAfterRangePiece";
NSString *MiscSubstringsPiece = @"MiscSubstringsPiece";
NSString *MiscSubrangesPiece = @"MiscSubrangesPiece";
/* Private functions */
static NSRange cStringRange(NSString *aString);
static NSRange _rangeFromReg(regexp_registers_t regs, int idx);
static NSString *_substringFromIndex(const char *cString, unsigned from, unsigned fullLength);
static NSString *_substringToIndex(const char *cString, unsigned to);
static NSString *_substringFromRange(const char *cString, NSRange range);
static NSArray *_substringsForRegisters(const char *cString, regexp_registers_t registers);
static NSArray *_subrangesForRegisters(regexp_registers_t registers);
static int _numOfRegexStruct(const char *cString, regexp_t pattern, int options,
NSRange range);
static BOOL _findRegexStruct(const char *cString, regexp_t pattern, unsigned mask,
NSRange range, int n, regexp_registers_t registers);
static NSString *_processedStringFor(const char *cString, NSString *replaceString,
unsigned mask, regexp_registers_t registers);
@implementation NSString (MiscRegex)
- (NSRange)rangeOfRegexStruct:(regexp_t)regex options:(unsigned)mask occurrenceNum:(int)n
range:(NSRange)range
{
struct re_registers registers;
NSRange foundRange = { NSNotFound, 0 };
if (_findRegexStruct([self cString], regex, mask, range, n, ®isters))
{
foundRange = _rangeFromReg(®isters, 0);
}
return foundRange;
}
- grepRegexStruct:(regexp_t)regex forPiece:(NSString *)key options:(unsigned)mask
occurrenceNum:(int)n
{
NSArray *keyArray = [NSArray arrayWithObject:key];
NSDictionary *theDict = [self grepRegexStruct:regex forPieces:keyArray
options:mask occurrenceNum:n];
return [theDict objectForKey:key];
}
- (NSDictionary *)grepRegexStruct:(regexp_t)regex forPieces:(NSArray *)keys
options:(unsigned)mask occurrenceNum:(int)n
{
struct re_registers registers;
const char * cString = [self cString];
NSRange searchRange = cStringRange(self);
unsigned cStringLen = searchRange.length;
NSRange foundRange = { NSNotFound, 0 };
NSString *currKey;
NSEnumerator *keyEnumerator = [keys objectEnumerator];
NSMutableDictionary *resultDict = [NSMutableDictionary
dictionaryWithCapacity:[keys count]];
if (_findRegexStruct(cString, regex, mask, searchRange, n, ®isters))
{
foundRange = _rangeFromReg(®isters,0);
}
while (currKey = [keyEnumerator nextObject])
{
id theObject = nil;
if ([currKey isEqualToString:MiscBeforePiece])
{
if (foundRange.location == NSNotFound)
theObject = self;
else
theObject = _substringToIndex(cString, foundRange.location);
}
else if ([currKey isEqualToString:MiscMiddlePiece])
{
if (foundRange.location == NSNotFound)
theObject = @"";
else
theObject = _substringFromRange(cString, foundRange);
}
else if ([currKey isEqualToString:MiscAfterPiece])
{
if (foundRange.location == NSNotFound)
theObject = @"";
else
theObject = _substringFromIndex(cString, NSMaxRange(foundRange), cStringLen);
}
else if ([currKey isEqualToString:MiscBeforeRangePiece])
{
NSRange aRange = { 0, [self cStringLength] };
if (foundRange.location != NSNotFound)
aRange.length = foundRange.location;
theObject = [NSValue value:&aRange withObjCType:@encode(NSRange)];
}
else if ([currKey isEqualToString:MiscMiddleRangePiece])
{
theObject = [NSValue value:&foundRange withObjCType:@encode(NSRange)];
}
else if ([currKey isEqualToString:MiscAfterRangePiece])
{
NSRange aRange = { NSNotFound, 0 };
if (foundRange.location != NSNotFound) {
aRange.location = NSMaxRange(foundRange);
aRange.length = [self cStringLength] - aRange.location;
}
theObject = [NSValue value:&aRange withObjCType:@encode(NSRange)];
}
else if ([currKey isEqualToString:MiscSubstringsPiece])
{
if (foundRange.location == NSNotFound)
theObject = [NSArray array];
else
theObject = _substringsForRegisters(cString, ®isters);
}
else if ([currKey isEqualToString:MiscSubrangesPiece])
{
if (foundRange.location == NSNotFound)
theObject = [NSArray array];
else
theObject = _subrangesForRegisters(®isters);
}
if (theObject)
[resultDict setObject:theObject forKey:currKey];
}
return resultDict;
}
- (NSArray *)rangesMatchedByRegexStruct:(regexp_t)pattern options:(unsigned)mask
{
struct re_registers registers;
char *myString = (char*)[self cString];
int myLength = [self cStringLength];
int startSpot = 0;
int currSpot = 0;
int lastEnd = 0;
int posFound = -1;
NSRange foundRange;
NSMutableArray *rangeArray = [NSMutableArray array];
NSValue *currValue;
while ((currSpot <= myLength) &&
(posFound = re_search_pattern(pattern, myString, myLength,
currSpot, myLength-currSpot, ®isters)) >= 0)
{
foundRange = _rangeFromReg(®isters, 0);
/*
* We don't want to have a zero-length match at the same spot a
* previous match ended. So, start looking at the next spot.
*/
if (lastEnd == foundRange.location && foundRange.length == 0 && currSpot > startSpot)
{
currSpot++;
continue;
}
currValue = [NSValue value:&foundRange withObjCType:@encode(NSRange)];
[rangeArray addObject:currValue];
lastEnd = currSpot = NSMaxRange(foundRange);
if(foundRange.length == 0) currSpot++;
}
if (posFound == -2)
[NSException raise:MiscRegexException format:@"Error during regex search"];
return rangeArray;
}
- (NSArray*)stringsMatchedByRegexStruct:(regexp_t)pattern options:(unsigned)mask
{
const char *cString = [self cString];
NSRange stringRange;
NSEnumerator *rangeEnum;
NSArray *rangesArray;
NSMutableArray *stringsArray = [NSMutableArray array];
NSValue *currValue;
rangesArray = [self rangesMatchedByRegexStruct:pattern options:mask];
rangeEnum = [rangesArray objectEnumerator];
while (currValue = [rangeEnum nextObject])
{
[currValue getValue:&stringRange];
[stringsArray addObject:_substringFromRange(cString, stringRange)];
}
return stringsArray;
}
- (NSArray *)componentsSeparatedByRegexStruct:(regexp_t)pattern options:(unsigned)mask
{
const char *cString = [self cString];
NSRange matchRange, stringRange;
NSEnumerator *rangeEnum;
NSArray *rangesArray;
NSMutableArray *stringsArray = [NSMutableArray array];
NSValue *currValue;
unsigned lastMatchEnd = 0;
rangesArray = [self rangesMatchedByRegexStruct:pattern options:mask];
rangeEnum = [rangesArray objectEnumerator];
while (currValue = [rangeEnum nextObject])
{
[currValue getValue:&matchRange];
stringRange.location = lastMatchEnd;
stringRange.length = matchRange.location - stringRange.location;
[stringsArray addObject:_substringFromRange(cString, stringRange)];
lastMatchEnd = NSMaxRange(matchRange);
}
stringRange.location = lastMatchEnd;
stringRange.length = [self cStringLength] - lastMatchEnd;
[stringsArray addObject:_substringFromRange(cString, stringRange)];
return stringsArray;
}
- (NSString *)stringByReplacingRegexStruct:(regexp_t)pattern
withString:(NSString *)replaceString options:(unsigned)mask
occurrenceNum:(int)n range:(NSRange)range
{
struct re_registers registers;
const char *cString = [self cString];
NSRange theRange;
NSString *realReplaceString;
if (_findRegexStruct(cString, pattern, mask, range, n, ®isters))
{
realReplaceString = _processedStringFor(cString, replaceString, mask, ®isters);
theRange = _rangeFromReg(®isters, 0);
return [NSString stringWithFormat:@"%@%@%@",
_substringToIndex(cString, theRange.location),
realReplaceString,
_substringFromIndex(cString, NSMaxRange(theRange), [self cStringLength])];
}
return self;
}
- (NSString *)stringByReplacingEveryOccurrenceOfRegexStruct:(regexp_t)pattern
withString:(NSString *)aString options:(unsigned)mask range:(NSRange)range
{
struct re_registers registers;
char *myString = (char*)[self cString];
unsigned cStringLen = [self cStringLength];
int myLength = NSMaxRange(range);
int startSpot = range.location;
int numFound = 0;
int currSpot = range.location;
int lastEnd = 0;
int posFound = -1;
NSMutableString *newString = [NSMutableString stringWithCapacity:[self length]];
NSRange foundRange;
NSRange betweenRange;
NSString *replaceString;
while ((currSpot <= myLength) && (posFound = re_search_pattern(pattern, myString,
myLength, currSpot, myLength-currSpot, ®isters)) >= 0)
{
foundRange = _rangeFromReg(®isters, 0);
betweenRange.location = lastEnd;
betweenRange.length = foundRange.location - lastEnd;
if (betweenRange.length == 0 && foundRange.length == 0 && currSpot > startSpot)
{
currSpot++;
continue;
}
if (betweenRange.length > 0)
[newString appendString:_substringFromRange(myString, betweenRange)];
replaceString = _processedStringFor(myString, aString, mask, ®isters);
[newString appendString:replaceString];
lastEnd = currSpot = NSMaxRange(foundRange);
if(foundRange.length == 0) currSpot++;
numFound++;
}
if (posFound == -2)
[NSException raise:MiscRegexException format:@"Error during regex search"];
if (lastEnd < cStringLen)
[newString appendString:_substringFromIndex(myString, lastEnd, cStringLen)];
return newString;
}
- (unsigned)numOfRegexStruct:(regexp_t)pattern options:(unsigned)mask range:(NSRange)range
{
return _numOfRegexStruct([self cString], pattern, mask, range);
}
-(BOOL)isValidRegex;
/*
* Unfortunately, re_compile_pattern() doesn't catch all possible problems
* -- it accepts things like "a**", which will cause a runtime error when
* searching. Most errors are caught though. So, I changed the
* implementation to be an NS_DURING/NSHANDLER around a call to _findRegex,
* which ensures the above errors are caught. I wanted to avoid having to
* do that, but...
*/
{
regexp_t pattern = NULL;
BOOL isValid;
NS_DURING
pattern = MiscNewRegexStruct(self, 0);
[@"foobar _20\n30baz" numOfRegexStruct:pattern];
isValid = YES;
NS_HANDLER
isValid = NO;
NS_ENDHANDLER
if (pattern) MiscFreeRegexStruct(pattern);
return isValid;
/*
* The original implementation, which didn't have to resort to catching
* exceptions but missed a few bad patterns.
*/
/* struct re_pattern_buffer pattern;
char translate[256];
char *errorString;
int i;
memset(&pattern, 0, sizeof(struct re_pattern_buffer));
for (i=0;i<256;i++) translate[i] = i;
pattern.translate = translate;
errorString = re_compile_pattern((char*)[self cString], [self cStringLength], &pattern);
if (pattern.buffer) free(pattern.buffer);
if (errorString)
return NO;
else
return YES;
*/
}
- (unsigned)numOfRegex:(NSString *)regex options:(unsigned)mask range:(NSRange)range
{
regexp_t pattern = MiscNewRegexStruct(regex, mask);
unsigned retVal;
retVal = [self numOfRegexStruct:pattern options:mask range:range];
MiscFreeRegexStruct(pattern);
return retVal;
}
- (NSRange)rangeOfRegex:(NSString *)regex options:(unsigned)mask occurrenceNum:(int)n
range:(NSRange)range
{
regexp_t pattern = MiscNewRegexStruct(regex, mask);
NSRange retVal;
retVal = [self rangeOfRegexStruct:pattern options:mask occurrenceNum:n range:range];
MiscFreeRegexStruct(pattern);
return retVal;
}
- grep:(NSString *)regex forPiece:(NSString *)key options:(unsigned)mask occurrenceNum:(int)n;
{
regexp_t pattern = MiscNewRegexStruct(regex, mask);
id retVal;
retVal = [self grepRegexStruct:pattern forPiece:key options:mask occurrenceNum:n];
MiscFreeRegexStruct(pattern);
return retVal;
}
- (NSDictionary *)grep:(NSString *)regex forPieces:(NSArray *)keys
options:(unsigned)mask occurrenceNum:(int)n
{
regexp_t pattern = MiscNewRegexStruct(regex, mask);
NSDictionary *retVal;
retVal = [self grepRegexStruct:pattern forPieces:keys options:mask occurrenceNum:n];
MiscFreeRegexStruct(pattern);
return retVal;
}
- (NSString *)stringByReplacingEveryOccurrenceOfRegex:(NSString *)regex
withString:(NSString *)aString options:(unsigned)mask range:(NSRange)range
{
regexp_t pattern = MiscNewRegexStruct(regex, mask);
id retVal;
retVal = [self stringByReplacingEveryOccurrenceOfRegexStruct:pattern
withString:aString options:mask range:range];
MiscFreeRegexStruct(pattern);
return retVal;
}
- (NSString *)stringByReplacingRegex:(NSString*)regex withString:(NSString*)replaceString
options:(unsigned)mask occurrenceNum:(int)n range:(NSRange)range
{
regexp_t pattern = MiscNewRegexStruct(regex, mask);
id retVal;
retVal = [self stringByReplacingRegexStruct:pattern withString:replaceString
options:mask occurrenceNum:n range:range];
MiscFreeRegexStruct(pattern);
return retVal;
}
- (NSArray *)stringsMatchedByRegex:(NSString *)regex options:(unsigned)mask
{
regexp_t pattern = MiscNewRegexStruct(regex, mask);
NSArray *retVal;
retVal = [self stringsMatchedByRegexStruct:pattern options:mask];
MiscFreeRegexStruct(pattern);
return retVal;
}
- (NSArray *)rangesMatchedByRegex:(NSString *)regex options:(unsigned)mask
{
regexp_t pattern = MiscNewRegexStruct(regex, mask);
NSArray *retVal;
retVal = [self rangesMatchedByRegexStruct:pattern options:mask];
MiscFreeRegexStruct(pattern);
return retVal;
}
- (NSArray *)componentsSeparatedByRegex:(NSString *)regex options:(unsigned)mask
{
regexp_t pattern = MiscNewRegexStruct(regex, mask);
NSArray *retVal;
retVal = [self componentsSeparatedByRegexStruct:pattern options:mask];
MiscFreeRegexStruct(pattern);
return retVal;
}
/*******************************************************************/
/************** Convenience methods ******************************/
/*******************************************************************/
- (unsigned)numOfRegex:(NSString *)regex
{ return [self numOfRegex:regex options:0 range:cStringRange(self)]; }
- (unsigned)numOfRegex:(NSString *)regex options:(unsigned)mask
{ return [self numOfRegex:regex options:mask range:cStringRange(self)]; }
- (unsigned)numOfRegex:(NSString *)regex range:(NSRange)range
{ return [self numOfRegex:regex options:0 range:range]; }
- (NSRange)rangeOfRegex:(NSString *)regex
{ return [self rangeOfRegex:regex options:0 occurrenceNum:0 range:cStringRange(self)]; }
- (NSRange)rangeOfRegex:(NSString *)regex options:(unsigned)mask
{ return [self rangeOfRegex:regex options:mask occurrenceNum:0 range:cStringRange(self)]; }
- (NSRange)rangeOfRegex:(NSString *)regex occurrenceNum:(int)n
{ return [self rangeOfRegex:regex options:0 occurrenceNum:n range:cStringRange(self)]; }
- (NSRange)rangeOfRegex:(NSString *)regex options:(unsigned)mask occurrenceNum:(int)n
{ return [self rangeOfRegex:regex options:mask occurrenceNum:n range:cStringRange(self)]; }
- (NSRange)rangeOfRegex:(NSString *)regex range:(NSRange)range
{ return [self rangeOfRegex:regex options:0 occurrenceNum:0 range:range]; }
- (NSRange)rangeOfRegex:(NSString *)regex options:(unsigned)mask range:(NSRange)range
{ return [self rangeOfRegex:regex options:mask occurrenceNum:0 range:range]; }
- (NSRange)rangeOfRegex:(NSString *)regex occurrenceNum:(int)n range:(NSRange)range
{ return [self rangeOfRegex:regex options:0 occurrenceNum:n range:range]; }
- (NSDictionary *)grep:(NSString *)regex forPieces:(NSArray *)keys
{ return [self grep:regex forPieces:keys options:0 occurrenceNum:0]; }
- (NSDictionary *)grep:(NSString *)regex forPieces:(NSArray *)keys options:(unsigned)mask
{ return [self grep:regex forPieces:keys options:mask occurrenceNum:0]; }
- (NSDictionary *)grep:(NSString *)regex forPieces:(NSArray *)keys occurrenceNum:(int)n
{ return [self grep:regex forPieces:keys options:0 occurrenceNum:n]; }
- grep:(NSString *)regex forPiece:(NSString *)keys
{ return [self grep:regex forPiece:keys options:0 occurrenceNum:0]; }
- grep:(NSString *)regex forPiece:(NSString *)keys options:(unsigned)mask
{ return [self grep:regex forPiece:keys options:mask occurrenceNum:0]; }
- grep:(NSString *)regex forPiece:(NSString *)keys occurrenceNum:(int)n
{ return [self grep:regex forPiece:keys options:0 occurrenceNum:n]; }
- (NSString *)stringByReplacingEveryOccurrenceOfRegex:(NSString *)regex withString:(NSString *)aString
{ return [self stringByReplacingEveryOccurrenceOfRegex:regex withString:aString options:0 range:cStringRange(self)]; }
- (NSString *)stringByReplacingEveryOccurrenceOfRegex:(NSString *)regex withString:(NSString *)aString options:(unsigned)mask
{ return [self stringByReplacingEveryOccurrenceOfRegex:regex withString:aString options:mask range:cStringRange(self)]; }
- (NSString *)stringByReplacingEveryOccurrenceOfRegex:(NSString *)regex withString:(NSString *)aString range:(NSRange)range
{ return [self stringByReplacingEveryOccurrenceOfRegex:regex withString:aString options:0 range:range]; }
- (NSString *)stringByReplacingRegex:(NSString*)regex withString:(NSString*)replaceString
{ return [self stringByReplacingRegex:regex withString:replaceString options:0 occurrenceNum:0 range:cStringRange(self)]; }
- (NSString *)stringByReplacingRegex:(NSString*)regex withString:(NSString*)replaceString options:(unsigned)mask
{ return [self stringByReplacingRegex:regex withString:replaceString options:mask occurrenceNum:0 range:cStringRange(self)]; }
- (NSString *)stringByReplacingRegex:(NSString*)regex withString:(NSString*)replaceString occurrenceNum:(int)n
{ return [self stringByReplacingRegex:regex withString:replaceString options:0 occurrenceNum:n range:cStringRange(self)]; }
- (NSString *)stringByReplacingRegex:(NSString*)regex withString:(NSString*)replaceString options:(unsigned)mask occurrenceNum:(int)n
{ return [self stringByReplacingRegex:regex withString:replaceString options:mask occurrenceNum:n range:cStringRange(self)]; }
- (NSString *)stringByReplacingRegex:(NSString*)regex withString:(NSString*)replaceString range:(NSRange)range
{ return [self stringByReplacingRegex:regex withString:replaceString options:0 occurrenceNum:0 range:range]; }
- (NSString *)stringByReplacingRegex:(NSString*)regex withString:(NSString*)replaceString options:(unsigned)mask range:(NSRange)range
{ return [self stringByReplacingRegex:regex withString:replaceString options:mask occurrenceNum:0 range:range]; }
- (NSString *)stringByReplacingRegex:(NSString*)regex withString:(NSString*)replaceString occurrenceNum:(int)n range:(NSRange)range
{ return [self stringByReplacingRegex:regex withString:replaceString options:0 occurrenceNum:n range:range]; }
- (NSArray*)stringsMatchedByRegex:(NSString *)regex
{ return [self stringsMatchedByRegex:regex options:0]; }
- (NSArray*)rangesMatchedByRegex:(NSString *)regex
{ return [self rangesMatchedByRegex:regex options:0]; }
- (NSArray*)componentsSeparatedByRegex:(NSString *)regex
{ return [self componentsSeparatedByRegex:regex options:0]; }
- (unsigned)numOfRegexStruct:(regexp_t)pattern
{ return [self numOfRegexStruct:pattern options:0 range:cStringRange(self)]; }
- (unsigned)numOfRegexStruct:(regexp_t)pattern options:(unsigned)mask
{ return [self numOfRegexStruct:pattern options:mask range:cStringRange(self)]; }
- (unsigned)numOfRegexStruct:(regexp_t)pattern range:(NSRange)range
{ return [self numOfRegexStruct:pattern options:0 range:range]; }
- (NSRange)rangeOfRegexStruct:(regexp_t)pattern
{ return [self rangeOfRegexStruct:pattern options:0 occurrenceNum:0 range:cStringRange(self)]; }
- (NSRange)rangeOfRegexStruct:(regexp_t)pattern options:(unsigned)mask
{ return [self rangeOfRegexStruct:pattern options:mask occurrenceNum:0 range:cStringRange(self)]; }
- (NSRange)rangeOfRegexStruct:(regexp_t)pattern occurrenceNum:(int)n
{ return [self rangeOfRegexStruct:pattern options:0 occurrenceNum:n range:cStringRange(self)]; }
- (NSRange)rangeOfRegexStruct:(regexp_t)pattern options:(unsigned)mask occurrenceNum:(int)n
{ return [self rangeOfRegexStruct:pattern options:mask occurrenceNum:n range:cStringRange(self)]; }
- (NSRange)rangeOfRegexStruct:(regexp_t)pattern range:(NSRange)range
{ return [self rangeOfRegexStruct:pattern options:0 occurrenceNum:0 range:range]; }
- (NSRange)rangeOfRegexStruct:(regexp_t)pattern options:(unsigned)mask range:(NSRange)range
{ return [self rangeOfRegexStruct:pattern options:mask occurrenceNum:0 range:range]; }
- (NSRange)rangeOfRegexStruct:(regexp_t)pattern occurrenceNum:(int)n range:(NSRange)range
{ return [self rangeOfRegexStruct:pattern options:0 occurrenceNum:n range:range]; }
- (NSDictionary *)grepRegexStruct:(regexp_t)pattern forPieces:(NSArray *)keys
{ return [self grepRegexStruct:pattern forPieces:keys options:0 occurrenceNum:0]; }
- (NSDictionary *)grepRegexStruct:(regexp_t)pattern forPieces:(NSArray *)keys options:(unsigned)mask
{ return [self grepRegexStruct:pattern forPieces:keys options:mask occurrenceNum:0]; }
- (NSDictionary *)grepRegexStruct:(regexp_t)pattern forPieces:(NSArray *)keys occurrenceNum:(int)n
{ return [self grepRegexStruct:pattern forPieces:keys options:0 occurrenceNum:n]; }
- grepRegexStruct:(regexp_t)pattern forPiece:(NSString *)keys
{ return [self grepRegexStruct:pattern forPiece:keys options:0 occurrenceNum:0]; }
- grepRegexStruct:(regexp_t)pattern forPiece:(NSString *)keys options:(unsigned)mask
{ return [self grepRegexStruct:pattern forPiece:keys options:mask occurrenceNum:0]; }
- grepRegexStruct:(regexp_t)pattern forPiece:(NSString *)keys occurrenceNum:(int)n
{ return [self grepRegexStruct:pattern forPiece:keys options:0 occurrenceNum:n]; }
- (NSString *)stringByReplacingEveryOccurrenceOfRegexStruct:(regexp_t)pattern withString:(NSString *)aString
{ return [self stringByReplacingEveryOccurrenceOfRegexStruct:pattern withString:aString options:0 range:cStringRange(self)]; }
- (NSString *)stringByReplacingEveryOccurrenceOfRegexStruct:(regexp_t)pattern withString:(NSString *)aString options:(unsigned)mask
{ return [self stringByReplacingEveryOccurrenceOfRegexStruct:pattern withString:aString options:mask range:cStringRange(self)]; }
- (NSString *)stringByReplacingEveryOccurrenceOfRegexStruct:(regexp_t)pattern withString:(NSString *)aString range:(NSRange)range
{ return [self stringByReplacingEveryOccurrenceOfRegexStruct:pattern withString:aString options:0 range:range]; }
- (NSString *)stringByReplacingRegexStruct:(regexp_t)pattern withString:(NSString*)replaceString
{ return [self stringByReplacingRegexStruct:pattern withString:replaceString options:0 occurrenceNum:0 range:cStringRange(self)]; }
- (NSString *)stringByReplacingRegexStruct:(regexp_t)pattern withString:(NSString*)replaceString options:(unsigned)mask
{ return [self stringByReplacingRegexStruct:pattern withString:replaceString options:mask occurrenceNum:0 range:cStringRange(self)]; }
- (NSString *)stringByReplacingRegexStruct:(regexp_t)pattern withString:(NSString*)replaceString occurrenceNum:(int)n
{ return [self stringByReplacingRegexStruct:pattern withString:replaceString options:0 occurrenceNum:n range:cStringRange(self)]; }
- (NSString *)stringByReplacingRegexStruct:(regexp_t)pattern withString:(NSString*)replaceString options:(unsigned)mask occurrenceNum:(int)n
{ return [self stringByReplacingRegexStruct:pattern withString:replaceString options:mask occurrenceNum:n range:cStringRange(self)]; }
- (NSString *)stringByReplacingRegexStruct:(regexp_t)pattern withString:(NSString*)replaceString range:(NSRange)range
{ return [self stringByReplacingRegexStruct:pattern withString:replaceString options:0 occurrenceNum:0 range:range]; }
- (NSString *)stringByReplacingRegexStruct:(regexp_t)pattern withString:(NSString*)replaceString options:(unsigned)mask range:(NSRange)range
{ return [self stringByReplacingRegexStruct:pattern withString:replaceString options:mask occurrenceNum:0 range:range]; }
- (NSString *)stringByReplacingRegexStruct:(regexp_t)pattern withString:(NSString*)replaceString occurrenceNum:(int)n range:(NSRange)range
{ return [self stringByReplacingRegexStruct:pattern withString:replaceString options:0 occurrenceNum:n range:range]; }
- (NSArray*)stringsMatchedByRegexStruct:(regexp_t)pattern
{ return [self stringsMatchedByRegexStruct:pattern options:0]; }
- (NSArray*)rangesMatchedByRegexStruct:(regexp_t)pattern
{ return [self rangesMatchedByRegexStruct:pattern options:0]; }
- (NSArray*)componentsSeparatedByRegexStruct:(regexp_t)pattern
{ return [self componentsSeparatedByRegexStruct:pattern options:0]; }
@end
@implementation NSMutableString (MiscRegex)
- (void)replaceEveryOccurrenceOfRegex:(NSString *)regex withString:(NSString *)replaceString
options:(unsigned)mask range:(NSRange)range
{
NSString *newString = [self stringByReplacingEveryOccurrenceOfRegex:regex
withString:replaceString options:mask range:range];
[self setString:newString];
}
- (void)replaceEveryOccurrenceOfRegexStruct:(regexp_t)pattern
withString:(NSString *)replaceString options:(unsigned)mask range:(NSRange)range
{
NSString *newString = [self stringByReplacingEveryOccurrenceOfRegexStruct:pattern
withString:replaceString options:mask range:range];
[self setString:newString];
}
- (void)replaceRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString
options:(unsigned)mask occurrenceNum:(int)n range:(NSRange)range
{
struct re_registers registers;
if (_findRegexStruct([self cString], pattern, mask, range, n, ®isters))
{
NSRange foundRange = _rangeFromReg(®isters, 0);
NSString *processedString;
processedString = _processedStringFor([self cString], replaceString, mask, ®isters);
[self replaceCharactersInRange:foundRange withString:processedString];
}
}
- (void)replaceRegex:(NSString *)regex withString:(NSString *)replaceString
options:(unsigned)mask occurrenceNum:(int)n range:(NSRange)range
{
regexp_t pattern = MiscNewRegexStruct(regex, mask);
[self replaceRegexStruct:pattern withString:replaceString options:mask
occurrenceNum:n range:range];
MiscFreeRegexStruct(pattern);
}
/*******************************************************************/
/************** Convenience methods ******************************/
/*******************************************************************/
- (void)replaceRegex:(NSString *)regex withString:(NSString *)replaceString
{ return [self replaceRegex:regex withString:replaceString options:0 occurrenceNum:0 range:cStringRange(self)];
}
- (void)replaceRegex:(NSString *)regex withString:(NSString *)replaceString options:(unsigned)mask
{ return [self replaceRegex:regex withString:replaceString options:mask occurrenceNum:0 range:cStringRange(self)];
}
- (void)replaceRegex:(NSString *)regex withString:(NSString *)replaceString occurrenceNum:(int)n
{ return [self replaceRegex:regex withString:replaceString options:0 occurrenceNum:n range:cStringRange(self)];
}
- (void)replaceRegex:(NSString *)regex withString:(NSString *)replaceString options:(unsigned)mask occurrenceNum:(int)n
{ return [self replaceRegex:regex withString:replaceString options:mask occurrenceNum:n range:cStringRange(self)];
}
- (void)replaceRegex:(NSString *)regex withString:(NSString *)replaceString range:(NSRange)range
{ return [self replaceRegex:regex withString:replaceString options:0 occurrenceNum:0 range:range];
}
- (void)replaceRegex:(NSString *)regex withString:(NSString *)replaceString options:(unsigned)mask range:(NSRange)range
{ return [self replaceRegex:regex withString:replaceString options:mask occurrenceNum:0 range:range];
}
- (void)replaceRegex:(NSString *)regex withString:(NSString *)replaceString occurrenceNum:(int)n range:(NSRange)range
{ return [self replaceRegex:regex withString:replaceString options:0 occurrenceNum:n range:range];
}
- (void)replaceRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString
{ return [self replaceRegexStruct:pattern withString:replaceString options:0 occurrenceNum:0 range:cStringRange(self)];
}
- (void)replaceRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString options:(unsigned)mask
{ return [self replaceRegexStruct:pattern withString:replaceString options:mask occurrenceNum:0 range:cStringRange(self)];
}
- (void)replaceRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString occurrenceNum:(int)n
{ return [self replaceRegexStruct:pattern withString:replaceString options:0 occurrenceNum:n range:cStringRange(self)];
}
- (void)replaceRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString options:(unsigned)mask occurrenceNum:(int)n
{ return [self replaceRegexStruct:pattern withString:replaceString options:mask occurrenceNum:n range:cStringRange(self)];
}
- (void)replaceRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString range:(NSRange)range
{ return [self replaceRegexStruct:pattern withString:replaceString options:0 occurrenceNum:0 range:range];
}
- (void)replaceRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString options:(unsigned)mask range:(NSRange)range
{ return [self replaceRegexStruct:pattern withString:replaceString options:mask occurrenceNum:0 range:range];
}
- (void)replaceRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString occurrenceNum:(int)n range:(NSRange)range
{ return [self replaceRegexStruct:pattern withString:replaceString options:0 occurrenceNum:n range:range];
}
- (void)replaceEveryOccurrenceOfRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString
{ return [self replaceEveryOccurrenceOfRegexStruct:pattern withString:replaceString options:0 range:cStringRange(self)];
}
- (void)replaceEveryOccurrenceOfRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString options:(unsigned)mask
{ return [self replaceEveryOccurrenceOfRegexStruct:pattern withString:replaceString options:mask range:cStringRange(self)];
}
- (void)replaceEveryOccurrenceOfRegexStruct:(regexp_t)pattern withString:(NSString *)replaceString range:(NSRange)range
{ return [self replaceEveryOccurrenceOfRegexStruct:pattern withString:replaceString options:0 range:range];
}
- (void)replaceEveryOccurrenceOfRegex:(NSString *)regex withString:(NSString *)replaceString
{ return [self replaceEveryOccurrenceOfRegex:regex withString:replaceString options:0 range:cStringRange(self)];
}
- (void)replaceEveryOccurrenceOfRegex:(NSString *)regex withString:(NSString *)replaceString options:(unsigned)mask
{ return [self replaceEveryOccurrenceOfRegex:regex withString:replaceString options:mask range:cStringRange(self)];
}
- (void)replaceEveryOccurrenceOfRegex:(NSString *)regex withString:(NSString *)replaceString range:(NSRange)range
{ return [self replaceEveryOccurrenceOfRegex:regex withString:replaceString options:0 range:range];
}
@end
#define NS_CHARSET(str) [NSCharacterSet characterSetWithCharactersInString:str]
static NSRange _rangeFromReg(regexp_registers_t regs, int idx)
{
NSRange theRange = {NSNotFound, 0};
if (regs->start[idx] >= 0)
{
theRange.location = regs->start[idx];
theRange.length = regs->end[idx] - theRange.location;
}
return theRange;
}
regexp_t MiscNewRegexStruct(NSString *regexString, unsigned options)
{
regexp_t pattern;
char *translate;
int i;
char *errorString;
if (!regexString) {
[NSException raise:MiscRegexException format:@"MiscNewRegexStruct(): nil regex string"];
}
pattern = (regexp_t)malloc( sizeof(struct re_pattern_buffer) );
translate = (char *) malloc( sizeof(char) * 256 );
memset(pattern, 0, sizeof(struct re_pattern_buffer));
for (i=0;i<256;i++) translate[i] = i;
if (options & NSCaseInsensitiveSearch)
{
for (i='A'; i<='Z'; i++) translate[i] = i - 'A' + 'a';
}
pattern->translate = translate;
if (errorString = re_compile_pattern((char*)[regexString cString],
[regexString cStringLength], pattern))
{
free(translate);
if (pattern->buffer) free(pattern->buffer);
free(pattern);
[NSException raise:MiscRegexException format:
@"MiscNewRegexStruct(): Malformed regular expression '%@': %s",
regexString, errorString];
}
if (options & MiscFasterSearch)
{
char *fastMap = (char *)malloc( sizeof(char) * 256 );
// memset(fastMap,0,256); // the regexpr code does this
pattern->fastmap = fastMap;
re_compile_fastmap(pattern);
}
return pattern;
}
void MiscFreeRegexStruct(regexp_t pattern)
{
if (pattern->fastmap) free(pattern->fastmap);
if (pattern->buffer) free(pattern->buffer);
free (pattern->translate);
free (pattern);
}
static NSString *_processedStringFor(const char *cString, NSString *replaceString,
unsigned mask, regexp_registers_t registers)
{
int len = [replaceString length];
NSMutableString *processedString;
static NSCharacterSet *digitCharacters;
static NSCharacterSet *specialChars;
static NSCharacterSet *ampersandSet;
static NSCharacterSet *needToQuoteChars;
static NSCharacterSet *noCharacterSet;
NSScanner *scanner;
NSString *tempString;
unichar theChar, nextChar;
NSRange theRange;
int matchNum;
unsigned scanLocation;
if (!(mask & MiscUseMatchSubstitutions)) return replaceString;
if (!digitCharacters) digitCharacters = [NSCharacterSet decimalDigitCharacterSet];
if (!specialChars) specialChars = NS_CHARSET(@"$&");
if (!ampersandSet) ampersandSet = NS_CHARSET(@"&");
if (!needToQuoteChars) needToQuoteChars = NS_CHARSET(@"&$");
if (!noCharacterSet) noCharacterSet = NS_CHARSET(@"");
processedString = [NSMutableString stringWithCapacity:len];
scanner = [NSScanner scannerWithString:replaceString];
[scanner setCharactersToBeSkipped:noCharacterSet];
while (![scanner isAtEnd])
{
matchNum = -1;
tempString = nil;
[scanner scanUpToCharactersFromSet:specialChars intoString:&tempString];
if (tempString) [processedString appendString:tempString];
scanLocation = [scanner scanLocation];
if (![scanner isAtEnd])
{
theChar = [replaceString characterAtIndex:scanLocation];
if ([ampersandSet characterIsMember:theChar])
{
matchNum = 0;
[scanner setScanLocation:scanLocation+1];
}
else if (scanLocation < (len-1)) // It's a dollar sign; see what
{
nextChar = [replaceString characterAtIndex:scanLocation+1];
if ([digitCharacters characterIsMember:nextChar]) {
matchNum = nextChar - '0';
[scanner setScanLocation:scanLocation+2];
}
else if ([needToQuoteChars characterIsMember:nextChar]) {
[processedString appendFormat:@"%c", nextChar];
[scanner setScanLocation:scanLocation+2];
}
else {
[processedString appendString:@"$"];
[scanner setScanLocation:scanLocation+1];
}
}
else // it's a dollar sign at the end of the string
{
[processedString appendString:@"$"];
[scanner setScanLocation:scanLocation+1];
}
if (matchNum >= 0 && (matchNum < RE_NREGS) && (registers->start[matchNum] >= 0))
{
theRange = _rangeFromReg(registers, matchNum);
[processedString appendString:_substringFromRange(cString, theRange)];
}
}
}
return processedString;
}
static NSString *_substringFromRange(const char *cString, NSRange range)
{
return [NSString stringWithCString:cString+range.location length:range.length];
}
static NSString *_substringToIndex(const char *cString, unsigned to)
{
return [NSString stringWithCString:cString length:to];
}
static NSString *_substringFromIndex(const char *cString, unsigned from, unsigned fullLength)
{
return [NSString stringWithCString:cString + from length:fullLength - from];
}
static NSRange cStringRange(NSString *aString)
{
NSRange theRange = { 0, [aString cStringLength] };
return theRange;
}
static BOOL _findRegexStruct(const char *cString, regexp_t pattern, unsigned mask,
NSRange range, int n, regexp_registers_t registers)
{
char *myString = (char*)cString;
int myLength = NSMaxRange(range);
int startSpot = range.location;
int numFound = 0;
int currSpot = range.location;
int lastEnd = 0;
int posFound = -1;
int matchLen = -1;
if (mask & NSBackwardsSearch) {
int num = _numOfRegexStruct(cString, pattern, mask, range);
n = num - 1 - n;
}
if (n < 0) {
return NO;
}
while ((numFound <= n) && (currSpot <= myLength) &&
((posFound = re_search_pattern(pattern, myString, myLength, currSpot,
myLength-currSpot, registers)) >= 0))
{
matchLen = _rangeFromReg(registers, 0).length;
if (matchLen == 0 && currSpot == lastEnd && currSpot > startSpot) {
currSpot++;
continue;
}
if (matchLen >= 0 && numFound <= n) {
lastEnd=currSpot = posFound + matchLen;
if (matchLen == 0) currSpot++;
numFound++;
}
else {
posFound = -1;
break;
}
}
if (n >= numFound) posFound = -1;
if (posFound == -2 || matchLen == -2)
[NSException raise:MiscRegexException format:@"Error while searching"];
if (posFound < 0) return NO;
if (mask & NSAnchoredSearch)
{
if (mask & NSBackwardsSearch) {
if (registers->end[0] == NSMaxRange(range)) return YES;
}
else {
if (registers->start[0] == range.location) return YES;
}
return NO;
}
return YES;
}
static NSArray *_substringsForRegisters(const char *cString, regexp_registers_t registers)
{
NSMutableArray *substringArray = [NSMutableArray array];
NSRange currRange;
int highestFound, i;
for (i=0,highestFound=0; i < RE_NREGS; i++)
if (registers->start[i] >= 0) highestFound = i;
for (i=0; i <= highestFound; i++)
{
if (registers->start[i] >= 0)
{
currRange = _rangeFromReg(registers, i);
[substringArray addObject:_substringFromRange(cString, currRange)];
}
else
{
[substringArray addObject:@""];
}
}
return substringArray;
}
static NSArray *_subrangesForRegisters(regexp_registers_t registers)
{
NSMutableArray *rangesArray = [NSMutableArray array];
NSRange currRange;
NSValue *currValue;
int highestFound, i;
for (i=0,highestFound=0; i < RE_NREGS; i++)
if (registers->start[i] >= 0) highestFound = i;
for (i=0; i <= highestFound; i++)
{
currRange = _rangeFromReg(registers, i);
currValue = [NSValue value:&currRange withObjCType:@encode(NSRange)];
[rangesArray addObject:currValue];
}
return rangesArray;
}
// int num = [self numOfRegexStruct:pattern options:mask];
static int _numOfRegexStruct(const char *cString, regexp_t pattern, int options, NSRange range)
{
struct re_registers registers;
char *myString = (char*)cString;
int myLength = NSMaxRange(range);
int startSpot = 0;
int currSpot = range.location;
int lastEnd = 0;
int posFound = -1;
int numFound = 0;
NSRange foundRange;
while ((currSpot <= myLength) &&
(posFound = re_search_pattern(pattern, myString, myLength,
currSpot, myLength-currSpot, ®isters)) >= 0)
{
foundRange = _rangeFromReg(®isters, 0);
if (lastEnd == foundRange.location && foundRange.length == 0 && currSpot > startSpot)
{
currSpot++;
continue;
}
numFound++;
lastEnd = currSpot = NSMaxRange(foundRange);
if(foundRange.length == 0) currSpot++;
}
if (posFound == -2)
[NSException raise:MiscRegexException format:@"Error during regex search"];
return numFound;
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.