COWSStringLibrary.m

This is COWSStringLibrary.m in view mode; [Download] [Up]
/*
	Copyright (C) 1994 Sean Luke

	COWSStringLibrary.m
	Version 1.0
	Sean Luke
	
*/




#import "COWSStringLibrary.h"
#import <ctype.h>

@implementation COWSStringLibrary


- loadLibrary:sender
	{
	[sender addLibraryFunction:"substring"
			selector:@selector(string_substring:)
			target:self];
	[sender addLibraryFunction:"token"
			selector:@selector(string_token:)
			target:self];
	[sender addLibraryFunction:"length"
			selector:@selector(string_length:)
			target:self];
	[sender addLibraryFunction:"upper-case"
			selector:@selector(string_uppercase:)
			target:self];
	[sender addLibraryFunction:"lower-case"
			selector:@selector(string_lowercase:)
			target:self];
	[sender addLibraryFunction:"mixed-case"
			selector:@selector(string_mixedcase:)
			target:self];
	[sender addLibraryFunction:"titled-case"
			selector:@selector(string_titledcase:)
			target:self];
	[sender addLibraryFunction:"return"
			selector:@selector(returnKey:)
			target:self];
	[sender addLibraryFunction:"tab"
			selector:@selector(tabKey:)
			target:self];
	[sender addLibraryFunction:"to-ascii"
			selector:@selector(toAscii:)
			target:self];
	[sender addLibraryFunction:"from-ascii"
			selector:@selector(fromAscii:)
			target:self];
	return self;
	}

// A private functions for searching characters

char* COWSSTRINGLIBRARY_charsearch(const char* string,int start,int end)
	// returns "" if nothing found (too long, etc.)
	// you're responsible for freeing the return value
	// pos starts at 1
	// if end==INT_MAX, it's the last item.
	{
	int sstart=start-1;
	int send=end-1;
	char* ret;
	int maxnum;
	
	// get rid of frivolous possibilities 
	// (start < 1, string is "" or NULL, or start>end)
	
	if (string==NULL ||
		sstart<0 ||
		sstart>send ||
		string[0]=='\0')  {ret=malloc(sizeof(char));ret[0]='\0';return ret;}
	
	// get rid of other possibilites
	
	maxnum=strlen(string)-1;
	
	if (sstart > maxnum)  {ret=malloc(sizeof(char));ret[0]='\0';return ret;}

	// set up max end
	
	if (send==INT_MAX-1) send=maxnum;
	
	// make sure max end is not invalid
	
	if (send>maxnum)  {ret=malloc(sizeof(char));ret[0]='\0';return ret;}
	
	// grab substring
	ret=malloc(sizeof(char)*(send-sstart+2));	// one extra for the \0
	strncpy(ret,&string[sstart],send-sstart+1);
	ret[send-sstart+1]='\0';
	return ret;
	}

int COWSSTRINGLIBRARY_tokenpos(const char* string,const char* delim,int number)
	// returns position of token # number, or -lastpos-1 if none
	{
	int x,y,z;
	int check;
	int delimeter;		// see below
	int num=0;
	int len=strlen(string);
	int dlen=(delim==NULL ? 0 : strlen(delim));
	int lastpos=0;			// the last place a non-delimeter was found
	
	for (x=-1;x<len;x++)
		{
		for (y=0;y<dlen;y++)
			{
						
			// everything from here to "yikes" could be just written as
			// if (x==-1 || string[x]==delim[y])
			// but gcc has a bug in it that turns x into 1 for some
			// inexplicable reason.  Hence the hack below to seperate
			// the two statements...
			

			delimeter=0;
			if (x==-1) delimeter=1;
			if (!delimeter) if (string[x]==delim[y])	delimeter=1;
			if (delimeter)
												// yikes!  it's a delimeter or
				{								// the beginning of the string!
				// now check to see if next item is also delimeter
				check=0;
				for (z=0;z<dlen;z++)
					{
					if (string[x+1]==delim[z])
						{
						check=1;
						break;
						}
					}
				if (!check)						// next one's not a delimeter	
					{
					if (++num==number) return x+1; //string[x+1];
					}
				}
			else 		// not a delimeter, so set value!
				lastpos++;
			if (delimeter) break;		// don't check for another delimeter
										// on the same item if you've already
										// found one!
			} 
		}
	return (0-lastpos)-1;		// oh, well...	
	}

char* COWSSTRINGLIBRARY_tokensearch(const char* string,const char* delim,
		int start, int end)
	
	// this message uses charsearch to look for items where delim is NULL
	// or empty...
	
	// this isn't super-efficient an algorithm, as it means copying string
	// in its entirety each time you need to do a search.  Time's a-wastin',
	// tho...
	// if end==INT_MAX, it's the last token of all
	{
	int len=strlen(string);
	int dlen=(delim==NULL ? 0 : strlen(delim));
	int x,y,z,a,foundit;
	char* ret;
	
	// first off, we check to see if we should really be calling charsearch
	
	if (delim==NULL || delim[0]=='\0') return
		COWSSTRINGLIBRARY_charsearch(string,start,end);
	
	// next off, in a string of X characters, there can be no more than
	// len/2+1 tokens.  So we check for this frivolity first before the much
	// more costly act of calling tokenpos, along with some other obvious
	// stupidities
	
	if (start<1 || start > len/2+1 || start>end || 
		string==NULL || string[0]=='\0')
		{ret=malloc(sizeof(char));ret[0]='\0';return ret;}
	
	// do the tokenpos on the first element
	
	x=COWSSTRINGLIBRARY_tokenpos(string,delim,start);
	if (x<0)		// bad value
		{ret=malloc(sizeof(char));ret[0]='\0';return ret;}
		
	// next do the tokenpos on the second element, if needed
	
	if (start==end) y=x;
	else
		{
		y=COWSSTRINGLIBRARY_tokenpos(string,delim,end);
		if (y<0&&end!=INT_MAX)		// bad value unless end is INT_MAX
			{ret=malloc(sizeof(char));ret[0]='\0';return ret;}
		}
	
	// okay, so we know that y exists.  Now we have to grab it.
	
	if (y<0)		// int_max, remember
		{
		y=0-(y+1);		// convert y to last non-delimeter
		}
	else
		{
						// we have to find the next delimeter
		foundit=0;
		for (z=y;z<len;z++)
			{
			for (a=0;a<dlen;a++)
				{
				if (string[z]==delim[a])
					{
					foundit=1;
					break;
					}
				}
			if (foundit) break;
			}
		if (foundit) z--;		// don't include the delimeter!
		// at this point, if end not found, it must be the end of the string
		y=z;
		}
	
	// Now grab the substring from x to y.
	
	ret=malloc(sizeof(char)*(y-x+2));	// one extra for the \0
	strncpy(ret,&string[x],y-x+1);
	ret[y-x+1]='\0';
	return ret;
	}

- string_substring:arg_list
	// format:
	//(substring <string> <start-val> <end-val> <delimeters>)
	//"end" is a valid end-val.
	//no delimiters, or "" for delimeters, means search by characters
	{
	const char* string;
	int start;
	int end;
	const char* delim=NULL;
	char* return_val;
	
	id ret=[[COWSStringNode alloc] init];
	
	if ([arg_list first]==NULL)
		{
		[ret setString:"substring:  no string to get a token from"];
		[ret setError:YES];
		return ret;
		}
	else string=[[arg_list now] string];
	if ([arg_list next]==NULL) 
		{
		[ret setString:"substring:  no start position for the token"];
		[ret setError:YES];
		return ret;
		}
	else start=[[arg_list now] intVal];
	if ([arg_list next]==NULL) 
		{
		[ret setString:"substring:  no end position for the token"];
		[ret setError:YES];
		return ret;
		}
	else end=[[arg_list now] intVal];
	
	// set up "end" keyword

	if (!strcmp("end",[[arg_list now] string])) end=INT_MAX;			
	if ([arg_list next]!=NULL) delim=[[arg_list now] string];
	
	return_val=COWSSTRINGLIBRARY_tokensearch(string,delim,start,end);
	
	[ret setString:return_val];
	free(return_val);
	return ret;	
	}
	
- string_token:arg_list
	// format:
	//(substring <string> <position> <delimeters>)
	//see above.
	{
	const char* string;
	int position;
	const char* delim=NULL;
	char* return_val;
	
	id ret=[[COWSStringNode alloc] init];
	
	if ([arg_list first]==NULL)
		{
		[ret setString:"token:  no string to get a token from"];
		[ret setError:YES];
		return ret;
		}
	else string=[[arg_list now] string];
	if ([arg_list next]==NULL) 
		{
		[ret setString:"token:  no position for the token"];
		[ret setError:YES];
		return ret;
		}
	else position=[[arg_list now] intVal];
	if ([arg_list next]!=NULL) delim=[[arg_list now] string];
	
	return_val=COWSSTRINGLIBRARY_tokensearch(string,delim,position,position);
	
	[ret setString:return_val];
	free(return_val);
	return ret;	
	}
	
- string_length:arg_list
	// format:
	//(length <string> <delimeters>)
	//no delimiters, or "" for delimeters, means search by characters
	{
	const char* string;
	const char* delim=NULL;
	char* x;
	char* t;
	int counter=0;
	
	id ret=[[COWSStringNode alloc] init];
	
	if ([arg_list first]==NULL)
		{
		[ret setString:"length:  no string to get the length of"];
		[ret setError:YES];
		return ret;
		}
	else string=[[arg_list now] string];
	if ([arg_list next]!=NULL) delim=[[arg_list now] string];
	
	if (delim==NULL)
		{
		[ret setIntVal:strlen(string)];
		return ret;
		}
	// okay, so it's not char-by-char...
	x=malloc(strlen(string)+1); t=x;
	strcpy(x,string);
	while (strtok(t,delim))
		{
		t=NULL;
		counter++;
		}
	free(x);
	[ret setIntVal:counter];
	return ret;
	}
	
- string_uppercase:arg_list
	{
	int x;
	int len;
	const char* string;
	char* return_val;
	id ret=[[COWSStringNode alloc] init];
	
	if ([arg_list first]==NULL)
		{
		[ret setString:"upper-case:  no string to set"];
		[ret setError:YES];
		return ret;
		}
	else string=[[arg_list now] string];
	
	len=strlen(string);
	return_val=malloc(len+1);
	strcpy(return_val,string);
	for (x=0;x<len;x++) return_val[x]=toupper(return_val[x]);
	
	[ret setString:return_val];
	free(return_val);
	return ret;
	}
	
- string_lowercase:arg_list
	{
	int x;
	int len;
	const char* string;
	char* return_val;
	id ret=[[COWSStringNode alloc] init];
	
	if ([arg_list first]==NULL)
		{
		[ret setString:"upper-case:  no string to set"];
		[ret setError:YES];
		return ret;
		}
	else string=[[arg_list now] string];
	
	len=strlen(string);
	return_val=malloc(len+1);
	strcpy(return_val,string);
	for (x=0;x<len;x++) return_val[x]=tolower(return_val[x]);
	
	[ret setString:return_val];
	free(return_val);
	return ret;
	}
	
	
// functions used for case conversion	
	
int COWSSTRINGLIBRARY_punc(char val)
	{
	if (val=='.' ||
		val=='!' ||
		val=='?' ||
		val==',' ||
		val==';' ||
		val==':' ||
		val=='\'' ||
		val=='\"' ||
		val=='(' ||
		val==')' ||
		val=='[' ||
		val==']' ||
		val=='{' ||
		val=='}' ||
		val=='&' ||
		val=='@' ||
		val=='`' ||
		val=='/') return 1;
	return 0;
	}	
	
int COWSSTRINGLIBRARY_formalpunc(char val)
	{
	if (val=='.' ||
		val=='!' ||
		val=='?') return 1;
	return 0;
	}	
	
- string_titledcase:arg_list
	{
	int x;
	int len;
	const char* string;
	char* return_val;
	int set_next_letter=1;
	id ret=[[COWSStringNode alloc] init];
	
	if ([arg_list first]==NULL)
		{
		[ret setString:"upper-case:  no string to set"];
		[ret setError:YES];
		return ret;
		}
	else string=[[arg_list now] string];
	
	len=strlen(string);
	return_val=malloc(len+1);
	strcpy(return_val,string);
	for (x=0;x<len;x++) 
		{
		if (isspace(return_val[x])||COWSSTRINGLIBRARY_punc(return_val[x]))
			set_next_letter=1;
		else if (set_next_letter&&isalpha(return_val[x]))
			{return_val[x]=toupper(return_val[x]);set_next_letter=0;}
		else if (isalpha(return_val[x]))
			return_val[x]=tolower(return_val[x]);
		}
	
	[ret setString:return_val];
	free(return_val);
	return ret;
	}
	
- string_mixedcase:arg_list
	{
	int x;
	int len;
	const char* string;
	char* return_val;
	int set_next_letter=1;
	id ret=[[COWSStringNode alloc] init];
	
	if ([arg_list first]==NULL)
		{
		[ret setString:"upper-case:  no string to set"];
		[ret setError:YES];
		return ret;
		}
	else string=[[arg_list now] string];
	
	len=strlen(string);
	return_val=malloc(len+1);
	strcpy(return_val,string);
	for (x=0;x<len;x++) 
		{
		if (COWSSTRINGLIBRARY_formalpunc(return_val[x]))
			set_next_letter=1;
		else if (isdigit(return_val[x])) 
			set_next_letter=0; 
			// digit started sentence
		else if (set_next_letter&&isalpha(return_val[x]))
			{return_val[x]=toupper(return_val[x]);set_next_letter=0;}
		else if (isalpha(return_val[x]))
			return_val[x]=tolower(return_val[x]);
		}
	
	[ret setString:return_val];
	free(return_val);
	return ret;
	}
	
- returnKey:arg_list
	{
	id ret=[[COWSStringNode alloc] init];
	[ret setString:"\n"];
	return ret;	
	}
	
- tabKey:arg_list
	{
	id ret=[[COWSStringNode alloc] init];
	[ret setString:"\t"];
	return ret;	
	}
	
- toAscii:arg_list
	{
	const char* string;
	id ret=[[COWSStringNode alloc] init];
	
	if ([arg_list first]==NULL)
		{
		[ret setString:"to-ascii:  not enough values"];
		[ret setError:YES];
		return ret;
		}
	else string=[[arg_list now] string];
	
	if (!strlen(string))
		{
		[ret setString:"to-ascii:  value is empty"];
		[ret setError:YES];
		return ret;
		}
	
	[ret setIntVal:(unsigned int) string[0]];
	return ret;	
	}
	
- fromAscii:arg_list
	{
	int x;
	char string[2];
	id ret=[[COWSStringNode alloc] init];
	
	if ([arg_list first]==NULL)
		{
		[ret setString:"from-ascii:  not enough values"];
		[ret setError:YES];
		return ret;
		}
	else x=[[arg_list now] intVal];
		
	string[0]=(char)x;
	string[1]='\0';
	[ret setString:string];
	return ret;	
	}

@end
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.