rtfFile.m

This is rtfFile.m in view mode; [Download] [Up]
/***********************************************************************\
RTF file class for Convert RTF which converts between Mac and NeXT rtf formats.
Copyright (C) 1993 David John Burrowes

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

The author, David John Burrowes, can be reached at:
	davidjohn@kira.net.netcom.com
	David John Burrowes
	1926 Ivy #10
	San Mateo, CA 94403-1367
\***********************************************************************/

#import "rtfFile.h"
#import <ctype.h>
#import <string.h>
#import	<stdio.h>
#import "rtfToken.h"

#import <objc/List.h>	// for the list class

@implementation rtfFile


- initAndUse:(roCString) pathname;
{
	[super   initAndUse: pathname];
	thequeue = [[List   alloc] initCount: MAXLINELENGTH];
	foundRTF = NO;
	QueueHasBegin = NO;
	TotalLength = 0;
	textOutputLength = 0;		// 93.02.21	Added for bugfix
	return self;
}

-free
{
	[thequeue   free];
	return [super   free];
}


- CloseAndSave
{
	[self   FlushQueue:PRETTYPRINT];
	[super   CloseAndSave];
	return self;
}


//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		WriteNeXTGraphicAt:WithName:Width:Height:
//	Parameters:	The width and height of the graphic, the file it's in (just the name, no
//				path) and a value which appears to be a dummy value these days.
//	Returns:		none
//	Stores:		errors
//	Description:
//		The NeXTGraphic control word is pretty damn weird.  So, this routine is dedicated
//		to writing it out properly, so callers needn't worry about how it wants to work in
//		this release. =)
//		In 3.0 (and hopefully this arrangement will work OK in the future?) the format is:
//		{<opt-rtf-text>{\NeXTGraphic#<1space>filename<1space>\width#<1space>
//		\height#\n}\n,-or-�}
//		Now, the filename and the width anbd height make sense.  The fact that I've seen
//		both comma and � is strange because I note no difference between them.  Not only
//		that, but remvoing them causes the picture not to display properly.  The number after
//		NeXTGraphic also appears random, in that I can set it to other values and nothing
//		bad seems to happen.  Until I figure out otherwise, callers are recommended to set
//		this to 0.  Scott Hess has suggested, very reasonably, that it may be a character count
//		(i.e this goes in as the 15th character of this document).  This requires some
//		investigation.  In my experience, Edit, at least, is VERY picky about the placement of
//		some of those braces, etc.  So, this will enforce the above layout, even if it isn't very
//		pretty or doesn't 'go' with other stuff.
//	Bugs:
//	History:
//		93.01.31	djb	Created
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- WriteNeXTGraphicAt: (PositiveInteger) loc
			WithName: (CString) fileName
			Width: (Integer) theWidth
			Height: (Integer) theHeight
{
	CString	myString = NewCString(strlen(fileName)+100);  // Be lazy and do overkill.
	//
	//	Clear stuff out so we can write out this monster.
	//
	[self   FlushQueue: PRETTYPRINT];
	//
	//	Build the NeXTGraphic string, and write it out.
	//	***NOTE*** that we go completely behind the back of the caller.  In particular,
	//	this does not update any counters or state variables if they exist.  This also
	//	doesn't clip to a particular # of output columns, etc 
	//
	sprintf(myString, "{{\\NeXTGraphic%lu %s \\width%ld \\height%ld\n}\n�}",
		loc, fileName, theWidth, theHeight);
	[self   WriteTextLine: myString];
	//
	//	With that dirty deed done, return.
	//
	FreeCString(myString);
	return self;
}





//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		WriteToken
//	Parameters:	A token to be written
//	Returns:		none
//	Stores:		errors
//	Description:
//		This takes a token, massages the output queue if necessary, and adds the
//		token to the queue to be written out.  The massaging involves flushing the
//		buffer/queue if (a) this is a begin token, or if the queue will generate a line
//		that is longer than we'd like of output.  After this, we just add the current
//		token, and if it is an end token and there is still a { ath the head of the queue,
//		we flush the queue then.
//	Bugs:
//	History:
//		92.12.25	djb	Added tempName, so as not to be leaking the cstring to the 'rtf' test
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- WriteToken: theToken
{
	TokenType		theType		= [theToken   GetType];
	PositiveInteger	deltaLength	= [theToken   GetLength];
	Boolean			dontFlush	= NO;
	CString			tempName;
	//
	//	Watch for initial {\rtf8\foo tokens.  Note the order here is very important, and
	//	we rely on a don'tflush flag to ourselves from flushing right after the \rtf rathern
	//	than after the \foo.  This is awkward and a bit ugly, and would be easier except
	//	that the wy things are built now, one can end up with one's token being free-ed
	//	while one still needs it.  Rather than fix the bug, we add a flag as a workaround.
	//	(removing the flag necessitates the following if statement to be moved below after
	//	the flush, which results in an error).  This whole sequence assures that the
	//	{\rtf0\foo  are written on the same line 
	//	This is a bit of a hack, but it works fine, in general.  It assumes, however, that
	//	the only \rtf token is found at the beginning of the file (not a big deal if it doesn't,
	//	just a bit weird).  It further assumes that the control word following the \rtf
	//	token is the char set one.  This, also, is generaly safe.  But it IS an assumption.
	//
	tempName = [theToken   GetName];
	//
	//	Add a check to avoid trying to write out null length words, if the caller is sloppy.
	//	This is a quick hack to fix a problem I just found during final beta testing.
	//	It should be fixed as well in the rtf converter itself.  But it's good to have the added
	//	filter here.
	//
	if ((strcmp(tempName, "") == 0) && (theType == tokenWord))
		[theToken	free];
	else
	{
		if (strcmp(tempName, "rtf") == 0)
		{
			foundRTF = YES;
			dontFlush = YES;
		}
		//
		//	Compute added length information, and flush if we have a {
		//
		switch (theType)
		{
			case tokenBeginGroup:
				[self   FlushQueue: PRETTYPRINT];
				QueueHasBegin = YES;
				deltaLength+=1;		// escape symbol ( {  )
				break;
			case tokenEndGroup:
				deltaLength+=1;		// escape symbol ( } )
				break;
			case tokenControlSymbol:
				deltaLength+=1;		// escape symbol ( \ )
				break;
			case tokenControlWord:
				deltaLength+=2;		// escape symbol and trailing space
				break;
			case tokenNoToken:
			case tokenWord:
				deltaLength +=0;
				break;
		}	
		//
		//	If our length is too long, then try to flush any leading control words or symbols.
		//
		if (TotalLength + deltaLength > MAXLINELENGTH)
			[self   FlushPartially: PRETTYPRINT];
		//
		//	Flush everything (not just control words or symbols) if space is still needed.
		//	and store the total lenght.  Then, store the new token.
		//
		if (TotalLength + deltaLength > MAXLINELENGTH)
		{
			[self   FlushQueue: PRETTYPRINT];
			TotalLength = deltaLength;
		}
		else
			TotalLength += deltaLength;
		[thequeue   addObject: theToken];
		//
		//
		//
		if ((foundRTF == YES)  && (dontFlush == NO))
		{
			foundRTF = NO;
			[self   FlushQueue: ASONELINE];
		}
		//
		//	If we added a } token, and there is a { at the beginning of the buffer, flush all.
		//
		if ((theType == tokenEndGroup) && (QueueHasBegin == YES))
			[self   FlushQueue: ASONELINE];
	}
	FreeCString(tempName);
	return self;
}



//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		Write:TokensAs:
//	Parameters:	The number of tokens to write, and a value indicating what we should do:
//		ASONELINE		Force the contents of the buffer out one the same line
//		PRETTYPRINT	Write the tokens out in a nice manner
//	Returns:		The type of the last token we wrote out.
//	Stores:		none
//	Description:
//		This is the core routine for writing out information from the queue of tokens.
//		you tell it how many tokens to write out, and then it will write them.  If you
//		tell it to prettyprint, it does so.  If you say asoneline, it will force all the tokens on
//		the queue to be written out on one line.
//		This does decrement TotalLength.
//		If ASONELINE is specified, no newlines are written.
//		with prettyprint, newlines always follow { and } characters, as well as
//		control words.  Control symbols and regular old words only get following
//		newlines if they are the last token in the queue, or if they are followed by
//		other types of tokens (being, end control word).
//		It then returns the type of the last token.  this might allow you to do some kind
//		of special post processing (e.g. if it returns a tokenWord, and you know it wasn't
//		at the end of the queue, then you know there is no newline after it in the file,
//		which might or might not please you)
//	Bugs:
//		If a control word is followed by a control word or begin or an end token, one can
//		safely omit the trailing space.    But, we don't.  
//		In a slightly earlier version, we weren't writing out \* tokens.  Why?
//	History:
//		92.12.25	djb	Added tempName to allow us to properly free cstrings we were leaking
//		93.02.21	djb	Added textOutputLength stuff.  I found that chunks of text were
//					being written out as a single long line, rather than breaking
//					at most every 72 (or whatever) characters.  The new scheme keeps
//					incrementing textOutputLine whenever we write a word or a
//					controlSymbol (otherwise we set it to 0).  If we find that the line is
//					full before we write the token, we move to a new line and continue.
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- (TokenType) Write: (PositiveInteger)  numTokens  TokensAs: (Integer) instruction
{
	TokenType		theType		= tokenNoToken;
//	TokenType		nextType;
	Instance			theToken;
//nextToken;
	CString			buffer		= NewCString(1024);
	PositiveInteger	tokenCount	= numTokens ;
	CString			tempName;
	CString			tempLoc;
	PositiveInteger	numChunks;
	PositiveInteger	chunkNum;
	PositiveInteger	wordLength;
	PositiveInteger	outputLength;
	
	while (tokenCount > 0)
	{
		//
		//	Get the next token to write out, and decrement the total length of the pending
		//	output line.
		//
		tokenCount--;
		theToken = [thequeue   removeObjectAt:0];
		theType = [theToken   GetType];
		
		TotalLength -=  [theToken   GetLength];
		if (theType == tokenControlWord)
			TotalLength -= 2;
		else if (theType != tokenWord)
			TotalLength --;
		
		tempName = [theToken   GetName];
		//
		//	93.02.21	Prepare the line based on the type of the current token (this could
		//	be done in the switch further below, but this logically groups a bit differently,
		//	and I figured it would be clearer if I separated it out all up here.  For some token
		//	types, we reset the length of the text output line to 0 because in all these cases,
		//	if we are prettyprinting, then these all finish up and move the current location
		//	to the start of a new line.  If we are writing all out asoneline, then it might be
		//	reasonable to increment the textoutputlength by the length of the token.  Yet,
		//	this is irrelevant since the whole is to be printed on one line anyway, so I just
		//	set them to 0 too.  If it is a controlsymbol or word, we check if the token will fit
		//	on the current line.  If not, we start a new line, and then continue.  (the line
		//	value is incremented for these below.
		//
		switch (theType)
		{
			case tokenBeginGroup:
			case tokenEndGroup:
			case tokenControlWord:
				//
				//	If there is text length right now, then we must move to a new line,
				//	assuming we are prettyprinting
				//
				if ((textOutputLength != 0) && (instruction != ASONELINE))
					[self  WriteTextLine: ""];
				//
				//	Always set to zero, since essentially always starting fresh after these
				//
				textOutputLength = 0;	
				break;
			case tokenControlSymbol:
			case tokenWord:
				//
				//	Check the length of the token, relative to the line.  If it won't fit,
				//	start a new output line. (the routines below increment the
				//	textOutputLength.  This is mainly because it would be too messy to
				//	deal with the need to break a long word up here (this has the effect,
				//	then, of always starting a very long word on it's own line.  This might
				//	result in a less than ideal output)
				//
				if (theType == tokenControlSymbol)
				{
					if (strcmp(tempName, "\'") == 0)
						outputLength = 4;
					else
						outputLength = 2;
				}
				else
					outputLength = strlen(tempName);
				if ( ((outputLength + textOutputLength) > MAXLINELENGTH)
					&& (instruction != ASONELINE))
				{
					textOutputLength = 0;
					[self   WriteTextLine: ""];
				}
				break;
			case tokenNoToken:
				//
				//	Do nothing.
				//
				break;
		}
		//
		//	Write the actual token out...
		//
		switch (theType)
		{
			case tokenBeginGroup:
				QueueHasBegin = NO;	// only 1 begin in queue at a time, and this removes it
				if (instruction == ASONELINE)
					[self   WriteText: "{"];
				else
					[self   WriteTextLine: "{"];
				break;
			case tokenEndGroup:
				if (instruction == ASONELINE)
					[self   WriteText: "}"];
				else
					[self   WriteTextLine: "}"];
				break;
			case tokenControlWord:
				//
				//	Write out the control word appropriately, depending on whether we
				//	are writing fixed output, or pretty printing, and whether or not
				//	it has a numeric parameter.
				//
				if  (instruction == ASONELINE)
				{
					if ( [theToken   HasValue] == YES)
						[self   WriteTextUsing:  buffer
							WithFormat:  "\\%s%d ",
							tempName, [theToken   GetValue]];
					else	// no parameter
						[self   WriteTextUsing:  buffer
							WithFormat:  "\\%s ", tempName];
				}
				else
				{
					if ( [theToken   HasValue] == YES)
						[self   WriteTextUsing:  buffer
							WithFormat:  "\\%s%d\n",
							tempName, [theToken   GetValue]];
					else	// no parameter
						[self   WriteTextUsing:  buffer
							WithFormat:  "\\%s\n", tempName];
				}
				break;
			case tokenControlSymbol:
				if (strcmp(tempName, "\'") == 0)
				{
					[self   WriteTextUsing:  buffer
						WithFormat:  "\\\'%.2x", [theToken   GetValue]];
					textOutputLength +=4;
				}
				else
				{
					if ( [theToken   HasValue] == YES)
						[self   WriteTextUsing:  buffer
							WithFormat:  "\\%s%d",
							tempName, [theToken   GetValue]];
					else	// no parameter
						[self   WriteTextUsing:  buffer
							WithFormat:  "\\%s", tempName];
					//
					//	This is oversimplistic.  It ignores the case of one with a
					//	parameter (which shouldn't happen anyway, though)
					//
					textOutputLength +=2;
				}
				break;
			case tokenWord:
				//
				//	Check the length of the word.  In general, it wil be less than MAXLINE..
				//	chars, so we just write it out.  Otherwise, we write it out in MAX... char
				//	chunks, breaking arbitrarily as we go.
				//
				#define	MaxChunkSize	MAXLINELENGTH
				wordLength = strlen(tempName);
				if (wordLength <= MaxChunkSize)
				{
					[self   WriteText: tempName];
					textOutputLength +=wordLength;
				}
				else
				{
					tempLoc = tempName;
					numChunks = wordLength / MaxChunkSize;
					for (chunkNum = 0; chunkNum < numChunks; chunkNum++)
					{
						[self Write: MaxChunkSize BytesFrom: (ByteString) tempLoc];
						[self   WriteText: "\n"];  // kludge for a newline
						tempLoc += MaxChunkSize;
					}
					//
					//	Write any remaining bytes.
					//
					[self Write: (wordLength % MaxChunkSize)
						BytesFrom: (ByteString) tempLoc];
					textOutputLength +=(wordLength % MaxChunkSize);
				}
				break;
			case tokenNoToken:
				break;
		}
		FreeCString(tempName);
		[theToken   free];
	}
	FreeCString(buffer);
	return theType;
}




//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		ClearThroughLastBegin
//	Parameters:	none
//	Returns:		YES if succeeded, NO if not
//	Stores:		success or failure code
//	Description:
//		This empties current contents of the queue, if there is a begin token.
//		Otherwise, it returns an error.  Tis allows the caller to backtrack and
//		decide they didn't want to write out some group afterall (e.g. starting to
//		write {\colortbl  only to discover that no colors were used)
//	Bugs:
//	History:
//		92.12.13	djb	Created
//		93.01.02	djb	Fixed bug of not clearing QueueHasBegin after clearing!
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- (Boolean) ClearThroughLastBegin
{
	Boolean			result		= NO;
	PositiveInteger	tokenCount	= [thequeue count];
	PositiveInteger	index		= 0;
	Instance			temptoken;

	if (QueueHasBegin == YES)
	{
		//
		//	There's a begin here, so just rip out the N tokens, and free them
		//
		for (index = 0; index < tokenCount; index++)
		{
			temptoken =  [thequeue   removeObjectAt:0];
			[temptoken  free];
		}
		QueueHasBegin = NO;

		result = YES;
		[self   StoreErrorCode: ERR_OK
			AndText: "Succeded in flushing back to { token"];
	}
	else
	{
		result = NO;
		[self   StoreErrorCode: ERR_NOBEGIN
			AndText: "Unable to find begin token.  Could not clear"];
	}
	return result;
}



//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		FlushPartially
//	Parameters:	A value indicating what we should do:
//		ASONELINE	Force the contents of the buffer out one the same line
//		PRETTYPRINT	Write the tokens out in a nice manner
//	Returns:		none
//	Stores:		none
//	Description:
//		This is much like FlushQueue, below, with one exception,  It counts back from
//		the end of the queue to find the last non-word or control symbol token in the
//		queue.  Then, it writes everyting up  (and including) to that out.
//		This allows us to keep any word tokens on the queue so they have the chance to
//		be written out with other word tokens that might be added.
//	Bugs:
//		If a control word is followed by a control word or space or an end token, one can
//		safely omit the trailing space.  
//	History:
//		93.02.21	djb	Added textOutputLength alteration.  See Write:TokenAs: for
//					more details.
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-FlushPartially: (Integer) instruction
{
	Instance		theToken;
	Boolean		seenNonWord	= NO;
	TokenType	theType;

	PositiveInteger	tokenNum = [thequeue count] ;
	
	while ( (seenNonWord == NO)  && (tokenNum > 0) )
	{
		tokenNum --;
		theToken = [thequeue objectAt: tokenNum];
		if (theToken != nil)
		{
			theType = [theToken    GetType];
			if ( (theType != tokenWord) && (theType != tokenControlSymbol) )
				seenNonWord = YES;
		}
	}
	if (seenNonWord == YES)
		[self   Write: tokenNum+1 TokensAs: instruction];

	if (instruction == ASONELINE)
	{
		[self   WriteText: "\n"];
		textOutputLength = 0;		// 93.02.21 For bugfix
	}
	return self;
}




//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		FlushQueue
//	Parameters:	A value indicating what we should do:
//		ASONELINE		Force the contents of the buffer out one the same line
//		PRETTYPRINT	Write the tokens out in a nice manner
//	Returns:		none
//	Stores:		none
//	Description:
//		This calls Write:TokensAs: to write all the tokens in the queue out.  Aside from
//		that, this doesn't so much anymore. 
//	Bugs:
//	History:
//		93.02.21	djb	Added textOutputLength alteration.  See Write:TokenAs: for
//					more details.
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- FlushQueue: (Integer) instruction
{
	[self   Write: [thequeue   count]  TokensAs: instruction];
	
	if (instruction == ASONELINE)
	{
		[self   WriteText: "\n"];
		textOutputLength = 0;		// 93.02.21 For bugfix
	}
	TotalLength = 0;
	return self;
}



//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		Hint
//	Parameters:	an item from the enumerated type HintName
//	Returns:		self
//	Stores:		none
//	Description:
//		This is an experiment with implementing a 'hint' method which I've been
//		thinking about.  I'm sure it's not quite right, but I gotta implement it to
//		figure out what's wrong.  The purpose of the method is simple.  It allows the
//		caller to give the instance suggestions about how it might want to behave.
//		Thus, it allows, perhaps, chances to tweak performance.  But it should not
//		cause the output to be altered in any way.
//		At the moment, this only recognizes two hints: that forthcomming word tokens
//		are picture data, and that they aren't.
//	Bugs:
//	History:
//		92.12.24	djb	created
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

- Hint: (HintName) theHint;
{
	switch (theHint)
	{
		case WordsArePictures:
			PictureHint = theHint;
			break;
		case WordsAreNotPictures:
			PictureHint = theHint;
			break;
		case NoHint:
			break;
	}
	return self;
}

		

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		GetToken
//	Parameters:	none
//	Returns:		The token that was read
//	Stores:		The token we filled
//				true if we filled it, false if not
//	Description:
//		When called, this tries to get a new token from the file. If we have not reached
//		EOF, we read the next character, and use it to determine what type of token
//		is next.  A \ introduces a control word or symbol, a { a begin group, } an end
//		group, and anything else introduces a word.  Call proper routines to read each in.
//	Bugs:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

- GetToken
{
	Character	theChar;
	Instance		theToken	= NullInstance;

	[self   ResetResults];

	if (FileIsOpen == NO )
	{
		[self	StoreErrorCode: ERR_FILENOTOPEN AndText: "Can't read from closed file"];
		theToken = NullInstance;
	}
	else
	{
		//
		//	Loop until:
		//		(1) the end of file is reached
		//		(2) a token is found,
		//
		do
		{
			//
			//	If we are comming through here a second time (the token was a null
			//	token the last time though, for instance), then free the token...
			//
			if (theToken != NullInstance)
				[theToken  free];
			//
			//	Get the next character, and use it to decide what kind of token is next.
			//
			theChar = [self LookAtNextCharacter];
			switch (theChar)
			{
				case '{' :
					theToken = [self GetOpenBraceToken];
					break;
				case '}' :
					theToken =[self GetCloseBraceToken];
					break;
				case '\\' :
					theToken = [self GetControlToken];
					break;
				default :
					theToken = [self GetWordToken];
					break;
			}
		}
		while (([theToken GetType] == tokenNoToken) && (FileLocation != fileAtEOF));
		//
		//	No real error codes to process here.  
		//
	}

	if (FileLocation == fileAtEOF)
		[self   StoreErrorCode: ERR_EOF AndText: "We found the end of file, dude."];
	else
		[self   StoreErrorCode: ERR_OK AndText: "Everything went GREAT!"];
	
	return theToken;
}


//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		GetNextControlToken
//	Parameters:	none
//	Returns:		The token that was read
//	Stores:		true if we filled it, false if not
//	Description:
//		This routine simply walks quickly through the file, looking for the next
//		control word or symbol token.  It ignores everything else.  When it finds the
//		start of one, it returns it to the caller.If it fines eof, it returns a null token and
//		an error.
//		This allows a caller to read in all the control token info more efficiently than caling
//		GetNextToken.
//	Bugs:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

- GetNextControlToken
{
	Character	theChar;
	Instance		theToken;

	[self   ResetResults];

	if (FileIsOpen == NO )
	{
		[self	StoreErrorCode: ERR_FILENOTOPEN AndText: "Can't read from closed file"];
		theToken = NullInstance;
	}
	else
	{
		do
		{
			theChar = [self ReadByte];
		}
		while ((theChar != '\\') && (FileLocation != fileAtEOF));
		if (theChar == '\\')
		{
			[self   UnGetCharacter];
			theToken = [self GetControlToken];
		}
		else
			theToken = NullInstance;
	}

	if (FileLocation == fileAtEOF)
		[self   StoreErrorCode: ERR_EOF AndText: "We found the end of file, dude."];
	else
		[self   StoreErrorCode: ERR_OK AndText: "Everything went GREAT!"];
	
	return theToken;
}

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		GetOpenBraceToken
//	Parameters:	none
//	Returns:		The token that was read
//	Stores:		none
//	Description:
//		This simply initalizes a begin-group token Instance, and returns it.  No biggie.
//		We then discard the { that is waiting.
//	Bugs:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- GetOpenBraceToken
{
	Instance		theToken;
	Character	theCharacter = [self GetCharacter];
	
	if (theCharacter == '\{')
	{
		theToken = [[rtfToken alloc] initTokenOfType: tokenBeginGroup];
		[self   StoreErrorCode: ERR_OK AndText: "OK"];
	}
	else
	{
		theToken = [[rtfToken alloc] initTokenOfType: tokenNoToken];
		[self   UnGetCharacter];
		[self   StoreErrorCode: ERR_BADCHAR
			AndText: "That was not my type of character"];
	}
	
	return theToken;
}


//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		GetCloseBraceToken
//	Parameters:	none
//	Returns:		The token that was read
//	Stores:		none
//	Description:
//		This simply initalizes a begin-group token Instance, and returns it.  No biggie.
//		We then discard the } that is waiting.
//	Bugs:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- GetCloseBraceToken
{
	Instance		theToken;
	Character	theCharacter = [self GetCharacter];
	
	if (theCharacter == '}')
	{
		theToken = [[rtfToken alloc] initTokenOfType: tokenEndGroup];
		[self   StoreErrorCode: ERR_OK AndText: "OK"];
	}
	else
	{
		theToken = [[rtfToken alloc] initTokenOfType: tokenNoToken];
		[self   UnGetCharacter];
		[self   StoreErrorCode: ERR_BADCHAR
			AndText: "That was not my type of character"];
	}
	
	return theToken;
}



//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		GetControlSymbolToken
//	Parameters:	none
//	Returns:		The token that was read
//	Stores:		error
//	Description:
//		Reads in a control token.  If the character following the \ is not a letter, it is a
//		control symbol.  A control symbol has no delimiter, Since, by definition,
//		it is 1 character long (aside from the \ escape).  (if the sequence is undefined, Word
//		appears to simply ignore the control symbol (or word)). Note that \' is unique in
//		that it does take a parameter (a 2 char hex value).
//		Otherwise, we have a control word, which is a sequence of letters
//		followed immediately, in some cases, by a numberic parameter.
//	Bugs:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

- GetControlToken
{
	//
	//	Read in the symbol character.
	//
	Character		theChar,
					theValue;
	PositiveInteger	nameBufferSize	= 15;
	PositiveInteger	nameBufferIncrement	= 15;
	PositiveInteger	nameIndex;
	CString			tempName,
					tokenName = NewCString(nameBufferSize);
	Instance			theToken;
	Integer			theNumber;
	
	[self   ResetResults];
	
	theChar = [self GetCharacter];
	if (theChar != '\\')
	{
		[self   StoreErrorCode: ERR_BADCHAR
			AndText: "The next value in the file was NOT a control token"];
		[self   UnGetCharacter];
		theToken = [[rtfToken alloc] initTokenOfType: tokenNoToken];
	}
	else
	{
		theChar = [self GetCharacter];
		if (isalpha(theChar)== 0)
		{
			theToken = [[rtfToken alloc] initTokenOfType: tokenControlSymbol];
			//
			//	Process a control symbol  \?
			//
			sprintf (tokenName, "%c", theChar);
			[theToken SetTokenName: tokenName];
			//
			//	If the token is \', then get it's hex parameter..
			//
			if (theChar == '\'')	// if it's the special one
			{
				theValue = [self GetHexByte]; 
				// @@ IGNORING ERROR CODE!!
				[theToken SetTokenValue: theValue];
			}
		}
		else
		{
			theToken = [[rtfToken alloc] initTokenOfType: tokenControlWord];
			//
			//	Process a control word
			//
			nameIndex = 0;
			while ((isalpha(theChar) != 0) && (FileLocation != fileAtEOF))
			{
				tokenName[nameIndex] = theChar;
				if (nameIndex >= nameBufferSize)
				{
					//
					//	Allocate more space for the name!
					//
					tempName = NewCString(nameBufferSize + nameBufferIncrement);
					strncpy(tempName, tokenName, nameBufferSize);
					nameBufferSize +=nameBufferIncrement;
					FreeCString(tokenName);
					tokenName = tempName;
				}
				theChar = [self   GetCharacter];
				nameIndex++;
			}
			tokenName[nameIndex] = NullCharacter;
			[theToken SetTokenName: tokenName];
			//
			//	A control word token can be terminated by a space which is consumed.
			//	f we were not, then we return the character to the file.
			//	93.02.21	Modified checking for terminating conditions.  We have so far
			//	accumulated \foo.  If the next character is a space, we consume it and
			//	proceed.  Otherwise, we replace it in the stream.  If it was, however, a
			//	digit, we read in the number, and consume the space that follows that, if
			//	any. (before this, we were not explicitly checking for digits, and so it would
			//	consider a newline between \foo and thenumber to be no big deal!
			//	93.02.21	Fixed a problem I just introduced.  Now, we check if the next char
			//	is a - as well as whether it is a digit.  
			//
			if (theChar != ' ')
			{
				[self   UnGetCharacter];
				if  ((isdigit(theChar) != 0) || (theChar == '-'))
				{
					theNumber = [self GetNumber];
					if ([self   GetErrorCode] == ERR_OK)
						[theToken SetTokenValue: theNumber];
					if ([self   LookAtNextCharacter] == ' ')
						[self   GetCharacter];
				}
			}
		}
		//
		//	Check that ALL went well to this poi.
		//
	}
	FreeCString(tokenName);
	return theToken;
}

		




//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		GetWordToken
//	Parameters:	none
//	Returns:		The token that was read
//	Stores:		error
//	Description:
//		This reads in the next token from the rtf file.  Since we were called by something
//		that presumably knows what it's doing, we assume that the next token IS a word
//		(if we notice it isn't, we return an error, of course)
//		NOTE: spaces are 'legal' components of a Word token IFF they are at the
//		beginning before any non-white characters appear.
//	Bugs:
//	History:
//		92.12.24	djb	Modified to regonize the hints for 'WordsArePictures'
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

- GetWordToken
{
	Boolean			terminatingWhite = NO, // true if a white space found after the word
					nonWhiteRead = NO;	// true if we  have read any non-white characters
	PositiveInteger	nameIndex = 0;
	PositiveInteger	nameBufferSize;
	PositiveInteger	nameBufferIncrement;
	CString			tempName,
					name;
	Character		theChar = [self GetCharacter];
	Instance			theToken;

	[self   ResetResults];
	//
	//	Allocate space for the name of this word token, and how much we should increment
	//	the buffer by if we run out of room.  If we suspect we're going to be reading in a
	//	picture, allocate a HECK of a lot more space for the token than normal. 
	//	Use one less than power of 2 so it and null byte take are a power.
	//
	if ( PictureHint == WordsArePictures)
	{
		nameBufferSize = (32*1024) -1; // Allocate 32K, (will hold 16K worth of pict)
		nameBufferIncrement = (32*1024) - 1;
	}
	else
	{
		nameBufferSize = 15;
		nameBufferIncrement = 63;
	}
	name = NewCString(nameBufferSize);
	//
	//	Set the nonWhite flag to true if it happens that our first character is not a space.
	//	Also, ignore the character if it is an end of line char.
	//
	if ((theChar != ' ')  && (theChar != '\r') && (theChar != '\n'))
		nonWhiteRead = YES;
	//
	//	As long as we haven't found a delimiter, read new characters	//
	while ((theChar != '{') 	&&			// the character isn't an open-group
			(theChar != '\\') 	&&			//nor is it an escape
			(theChar != '}')		&&		// nor is it a close group
			(terminatingWhite != YES)	&&	//we haven't found a white space after the word
			(FileLocation != fileAtEOF))	// the end of file has not been reached (BAAAD)
	{
		//
		//	If the character is a CR or LF, skip it, Otherwise store the char.
		//
		if ((theChar != '\r') && (theChar != '\n'))
		{
			name[nameIndex] = theChar;
			nameIndex++;
			if (nameIndex >= nameBufferSize)
			{
				//
				//	Allocate more space for the name!
				//
				tempName = NewCString(nameBufferSize + nameBufferIncrement);
				strncpy(tempName, name, nameBufferSize);
				tempName[nameBufferSize] = NullCharacter;
				nameBufferSize +=nameBufferIncrement;
				FreeCString(name);
				name = tempName;
			}
		}
		//
		//	Get the next character.  If we've already read a non-white character, and just
		//	read a white one, then it serves as a delimiter...		//
		theChar = [self GetCharacter];
		if (theChar == ' ')
		{
			if (nonWhiteRead == YES)
				terminatingWhite = YES;
		}
		else
			nonWhiteRead = YES;
	}

	//	
	//	If we were delimited by white space, or the escape or begin-end-group chars
	//	then unget the character.	
	//
	if ((terminatingWhite == YES) ||
		(theChar == '\{') 	||
		(theChar == '\\') 	||
		(theChar == '}'))
	[self   UnGetCharacter];

	name[nameIndex] = NullCharacter;

	if (nameIndex != 0)
	{
		theToken = [[rtfToken alloc] initTokenOfType: tokenWord];
		[theToken   SetTokenName: name];
	}
	else
	{
		//
		//	Check for a null token (possible if, say, we fould only CR or LF before
		//	a control word, or if the first thing we found was a delimiter)
		//
		theToken = [[rtfToken alloc] initTokenOfType: tokenNoToken];
		[self   StoreErrorCode: ERR_NOTOKENFOUND AndText: "A null Word token found"];
	}

	FreeCString(name);
	return theToken;
}




//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Routine:		GetFamilyNameToken
//	Parameters:	none
//	Returns:		The token that was read
//	Stores:		error
//	Description:
//		This is a specialized version of the GetWordToken call.  It exists because it turns
//		out that at least MS Word is happy to break the name of a type family across
//		lines, and assumes the end of line character will be treated as a space (that is,
//		in all other parts of the file, a line ending should be treated as if it doesn't exist.
//		In this case, it might mean there should be a space there).  This simply does
//		the actions of reading in all the characters until a { or \ or } is found (hopefully,
//		it will always be the last!!!  Perhaps we should even be checking for the errors
//		from it.  We return the whole to the caller as a word token.
//	Bugs:
//	History:
//		92.12.24	djb	Modified to use namebuffer and namebufferincrement
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

- GetFamilyNameToken
{
	PositiveInteger	nameIndex = 0;
	PositiveInteger	nameBufferSize	= 31;
	PositiveInteger	nameBufferIncrement	= 15;
	CString			tempName,
					name;
	Character		theChar = [self GetCharacter];
	Instance			theToken;

	[self   ResetResults];
	//
	//	Allocate space for the name of family name
	//
	name = NewCString(nameBufferSize);
	//
	//	As long as we haven't found a delimiter, read new characters	//
	while ((theChar != '{') 	&&			// the character isn't an open-group
			(theChar != '\\') 	&&			//nor is it an escape
			(theChar != '}')		&&		// nor is it a close group
			(theChar != ';')	&&			// special because it can terminate a family name
			(FileLocation != fileAtEOF))	// the end of file has not been reached (BAAAD)
	{
		//
		//	If the character is a CR or LF, skip it, Otherwise store the char.
		//
		if ((theChar== '\r') || (theChar == '\n'))
			name[nameIndex] = ' ';
		else
			name[nameIndex] = theChar;
		nameIndex++;
		if (nameIndex >=  nameBufferSize)
		{
			//
			//	Allocate more space for the name!
			//
			tempName = NewCString(nameBufferSize + nameBufferIncrement);
			strncpy(tempName, name, nameBufferSize);
			tempName[nameBufferSize] = NullCharacter;
			nameBufferSize +=nameBufferIncrement;
			FreeCString(name);
			name = tempName;
		}
		//
		//	Get the next character.  If we've already read a non-white character, and just
		//	read a white one, then it serves as a delimiter...		//
		theChar = [self GetCharacter];
	}

	if (theChar == ';')
	{
		//
		//	consume any remaining spaces
		//
		do
		{
			theChar = [self   GetCharacter];
		}
		while ( theChar == ' ');
		[self   UnGetCharacter];
	}
	//	
	//	If we were delimited by white space, or the escape or begin-end-group chars
	//	then unget the character.	
	//
	else if ((theChar == '\{') 	||
		(theChar == '\\') 	||
		(theChar == '}'))
	[self   UnGetCharacter];

	name[nameIndex] = NullCharacter;

	if (strlen(name) != 0)
	{
		theToken = [[rtfToken alloc] initTokenOfType: tokenWord];
		[theToken   SetTokenName: name];
	}
	else
	{
		//
		//	Check for a null token (possible if, say, we fould only CR or LF before
		//	a control word, or if the first thing we found was a delimiter)
		//
		theToken = [[rtfToken alloc] initTokenOfType: tokenNoToken];
		[self   StoreErrorCode: ERR_NOTOKENFOUND AndText: "A null Word token found"];
	}

	FreeCString(name);
	return theToken;
}



@end
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.