This is rtfFile.m in view mode; [Download] [Up]
/***********************************************************************\
RTF file class for Convert RTF which converts between Mac and NeXT rtf formats.
Copyright (C) 1993 David John Burrowes
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
The author, David John Burrowes, can be reached at:
davidjohn@kira.net.netcom.com
David John Burrowes
1926 Ivy #10
San Mateo, CA 94403-1367
\***********************************************************************/
#import "rtfFile.h"
#import <ctype.h>
#import <string.h>
#import <stdio.h>
#import "rtfToken.h"
#import <objc/List.h> // for the list class
@implementation rtfFile
- initAndUse:(roCString) pathname;
{
[super initAndUse: pathname];
thequeue = [[List alloc] initCount: MAXLINELENGTH];
foundRTF = NO;
QueueHasBegin = NO;
TotalLength = 0;
textOutputLength = 0; // 93.02.21 Added for bugfix
return self;
}
-free
{
[thequeue free];
return [super free];
}
- CloseAndSave
{
[self FlushQueue:PRETTYPRINT];
[super CloseAndSave];
return self;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Routine: WriteNeXTGraphicAt:WithName:Width:Height:
// Parameters: The width and height of the graphic, the file it's in (just the name, no
// path) and a value which appears to be a dummy value these days.
// Returns: none
// Stores: errors
// Description:
// The NeXTGraphic control word is pretty damn weird. So, this routine is dedicated
// to writing it out properly, so callers needn't worry about how it wants to work in
// this release. =)
// In 3.0 (and hopefully this arrangement will work OK in the future?) the format is:
// {<opt-rtf-text>{\NeXTGraphic#<1space>filename<1space>\width#<1space>
// \height#\n}\n,-or-¬}
// Now, the filename and the width anbd height make sense. The fact that I've seen
// both comma and ¬ is strange because I note no difference between them. Not only
// that, but remvoing them causes the picture not to display properly. The number after
// NeXTGraphic also appears random, in that I can set it to other values and nothing
// bad seems to happen. Until I figure out otherwise, callers are recommended to set
// this to 0. Scott Hess has suggested, very reasonably, that it may be a character count
// (i.e this goes in as the 15th character of this document). This requires some
// investigation. In my experience, Edit, at least, is VERY picky about the placement of
// some of those braces, etc. So, this will enforce the above layout, even if it isn't very
// pretty or doesn't 'go' with other stuff.
// Bugs:
// History:
// 93.01.31 djb Created
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- WriteNeXTGraphicAt: (PositiveInteger) loc
WithName: (CString) fileName
Width: (Integer) theWidth
Height: (Integer) theHeight
{
CString myString = NewCString(strlen(fileName)+100); // Be lazy and do overkill.
//
// Clear stuff out so we can write out this monster.
//
[self FlushQueue: PRETTYPRINT];
//
// Build the NeXTGraphic string, and write it out.
// ***NOTE*** that we go completely behind the back of the caller. In particular,
// this does not update any counters or state variables if they exist. This also
// doesn't clip to a particular # of output columns, etc
//
sprintf(myString, "{{\\NeXTGraphic%lu %s \\width%ld \\height%ld\n}\n¬}",
loc, fileName, theWidth, theHeight);
[self WriteTextLine: myString];
//
// With that dirty deed done, return.
//
FreeCString(myString);
return self;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Routine: WriteToken
// Parameters: A token to be written
// Returns: none
// Stores: errors
// Description:
// This takes a token, massages the output queue if necessary, and adds the
// token to the queue to be written out. The massaging involves flushing the
// buffer/queue if (a) this is a begin token, or if the queue will generate a line
// that is longer than we'd like of output. After this, we just add the current
// token, and if it is an end token and there is still a { ath the head of the queue,
// we flush the queue then.
// Bugs:
// History:
// 92.12.25 djb Added tempName, so as not to be leaking the cstring to the 'rtf' test
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- WriteToken: theToken
{
TokenType theType = [theToken GetType];
PositiveInteger deltaLength = [theToken GetLength];
Boolean dontFlush = NO;
CString tempName;
//
// Watch for initial {\rtf8\foo tokens. Note the order here is very important, and
// we rely on a don'tflush flag to ourselves from flushing right after the \rtf rathern
// than after the \foo. This is awkward and a bit ugly, and would be easier except
// that the wy things are built now, one can end up with one's token being free-ed
// while one still needs it. Rather than fix the bug, we add a flag as a workaround.
// (removing the flag necessitates the following if statement to be moved below after
// the flush, which results in an error). This whole sequence assures that the
// {\rtf0\foo are written on the same line
// This is a bit of a hack, but it works fine, in general. It assumes, however, that
// the only \rtf token is found at the beginning of the file (not a big deal if it doesn't,
// just a bit weird). It further assumes that the control word following the \rtf
// token is the char set one. This, also, is generaly safe. But it IS an assumption.
//
tempName = [theToken GetName];
//
// Add a check to avoid trying to write out null length words, if the caller is sloppy.
// This is a quick hack to fix a problem I just found during final beta testing.
// It should be fixed as well in the rtf converter itself. But it's good to have the added
// filter here.
//
if ((strcmp(tempName, "") == 0) && (theType == tokenWord))
[theToken free];
else
{
if (strcmp(tempName, "rtf") == 0)
{
foundRTF = YES;
dontFlush = YES;
}
//
// Compute added length information, and flush if we have a {
//
switch (theType)
{
case tokenBeginGroup:
[self FlushQueue: PRETTYPRINT];
QueueHasBegin = YES;
deltaLength+=1; // escape symbol ( { )
break;
case tokenEndGroup:
deltaLength+=1; // escape symbol ( } )
break;
case tokenControlSymbol:
deltaLength+=1; // escape symbol ( \ )
break;
case tokenControlWord:
deltaLength+=2; // escape symbol and trailing space
break;
case tokenNoToken:
case tokenWord:
deltaLength +=0;
break;
}
//
// If our length is too long, then try to flush any leading control words or symbols.
//
if (TotalLength + deltaLength > MAXLINELENGTH)
[self FlushPartially: PRETTYPRINT];
//
// Flush everything (not just control words or symbols) if space is still needed.
// and store the total lenght. Then, store the new token.
//
if (TotalLength + deltaLength > MAXLINELENGTH)
{
[self FlushQueue: PRETTYPRINT];
TotalLength = deltaLength;
}
else
TotalLength += deltaLength;
[thequeue addObject: theToken];
//
//
//
if ((foundRTF == YES) && (dontFlush == NO))
{
foundRTF = NO;
[self FlushQueue: ASONELINE];
}
//
// If we added a } token, and there is a { at the beginning of the buffer, flush all.
//
if ((theType == tokenEndGroup) && (QueueHasBegin == YES))
[self FlushQueue: ASONELINE];
}
FreeCString(tempName);
return self;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Routine: Write:TokensAs:
// Parameters: The number of tokens to write, and a value indicating what we should do:
// ASONELINE Force the contents of the buffer out one the same line
// PRETTYPRINT Write the tokens out in a nice manner
// Returns: The type of the last token we wrote out.
// Stores: none
// Description:
// This is the core routine for writing out information from the queue of tokens.
// you tell it how many tokens to write out, and then it will write them. If you
// tell it to prettyprint, it does so. If you say asoneline, it will force all the tokens on
// the queue to be written out on one line.
// This does decrement TotalLength.
// If ASONELINE is specified, no newlines are written.
// with prettyprint, newlines always follow { and } characters, as well as
// control words. Control symbols and regular old words only get following
// newlines if they are the last token in the queue, or if they are followed by
// other types of tokens (being, end control word).
// It then returns the type of the last token. this might allow you to do some kind
// of special post processing (e.g. if it returns a tokenWord, and you know it wasn't
// at the end of the queue, then you know there is no newline after it in the file,
// which might or might not please you)
// Bugs:
// If a control word is followed by a control word or begin or an end token, one can
// safely omit the trailing space. But, we don't.
// In a slightly earlier version, we weren't writing out \* tokens. Why?
// History:
// 92.12.25 djb Added tempName to allow us to properly free cstrings we were leaking
// 93.02.21 djb Added textOutputLength stuff. I found that chunks of text were
// being written out as a single long line, rather than breaking
// at most every 72 (or whatever) characters. The new scheme keeps
// incrementing textOutputLine whenever we write a word or a
// controlSymbol (otherwise we set it to 0). If we find that the line is
// full before we write the token, we move to a new line and continue.
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- (TokenType) Write: (PositiveInteger) numTokens TokensAs: (Integer) instruction
{
TokenType theType = tokenNoToken;
// TokenType nextType;
Instance theToken;
//nextToken;
CString buffer = NewCString(1024);
PositiveInteger tokenCount = numTokens ;
CString tempName;
CString tempLoc;
PositiveInteger numChunks;
PositiveInteger chunkNum;
PositiveInteger wordLength;
PositiveInteger outputLength;
while (tokenCount > 0)
{
//
// Get the next token to write out, and decrement the total length of the pending
// output line.
//
tokenCount--;
theToken = [thequeue removeObjectAt:0];
theType = [theToken GetType];
TotalLength -= [theToken GetLength];
if (theType == tokenControlWord)
TotalLength -= 2;
else if (theType != tokenWord)
TotalLength --;
tempName = [theToken GetName];
//
// 93.02.21 Prepare the line based on the type of the current token (this could
// be done in the switch further below, but this logically groups a bit differently,
// and I figured it would be clearer if I separated it out all up here. For some token
// types, we reset the length of the text output line to 0 because in all these cases,
// if we are prettyprinting, then these all finish up and move the current location
// to the start of a new line. If we are writing all out asoneline, then it might be
// reasonable to increment the textoutputlength by the length of the token. Yet,
// this is irrelevant since the whole is to be printed on one line anyway, so I just
// set them to 0 too. If it is a controlsymbol or word, we check if the token will fit
// on the current line. If not, we start a new line, and then continue. (the line
// value is incremented for these below.
//
switch (theType)
{
case tokenBeginGroup:
case tokenEndGroup:
case tokenControlWord:
//
// If there is text length right now, then we must move to a new line,
// assuming we are prettyprinting
//
if ((textOutputLength != 0) && (instruction != ASONELINE))
[self WriteTextLine: ""];
//
// Always set to zero, since essentially always starting fresh after these
//
textOutputLength = 0;
break;
case tokenControlSymbol:
case tokenWord:
//
// Check the length of the token, relative to the line. If it won't fit,
// start a new output line. (the routines below increment the
// textOutputLength. This is mainly because it would be too messy to
// deal with the need to break a long word up here (this has the effect,
// then, of always starting a very long word on it's own line. This might
// result in a less than ideal output)
//
if (theType == tokenControlSymbol)
{
if (strcmp(tempName, "\'") == 0)
outputLength = 4;
else
outputLength = 2;
}
else
outputLength = strlen(tempName);
if ( ((outputLength + textOutputLength) > MAXLINELENGTH)
&& (instruction != ASONELINE))
{
textOutputLength = 0;
[self WriteTextLine: ""];
}
break;
case tokenNoToken:
//
// Do nothing.
//
break;
}
//
// Write the actual token out...
//
switch (theType)
{
case tokenBeginGroup:
QueueHasBegin = NO; // only 1 begin in queue at a time, and this removes it
if (instruction == ASONELINE)
[self WriteText: "{"];
else
[self WriteTextLine: "{"];
break;
case tokenEndGroup:
if (instruction == ASONELINE)
[self WriteText: "}"];
else
[self WriteTextLine: "}"];
break;
case tokenControlWord:
//
// Write out the control word appropriately, depending on whether we
// are writing fixed output, or pretty printing, and whether or not
// it has a numeric parameter.
//
if (instruction == ASONELINE)
{
if ( [theToken HasValue] == YES)
[self WriteTextUsing: buffer
WithFormat: "\\%s%d ",
tempName, [theToken GetValue]];
else // no parameter
[self WriteTextUsing: buffer
WithFormat: "\\%s ", tempName];
}
else
{
if ( [theToken HasValue] == YES)
[self WriteTextUsing: buffer
WithFormat: "\\%s%d\n",
tempName, [theToken GetValue]];
else // no parameter
[self WriteTextUsing: buffer
WithFormat: "\\%s\n", tempName];
}
break;
case tokenControlSymbol:
if (strcmp(tempName, "\'") == 0)
{
[self WriteTextUsing: buffer
WithFormat: "\\\'%.2x", [theToken GetValue]];
textOutputLength +=4;
}
else
{
if ( [theToken HasValue] == YES)
[self WriteTextUsing: buffer
WithFormat: "\\%s%d",
tempName, [theToken GetValue]];
else // no parameter
[self WriteTextUsing: buffer
WithFormat: "\\%s", tempName];
//
// This is oversimplistic. It ignores the case of one with a
// parameter (which shouldn't happen anyway, though)
//
textOutputLength +=2;
}
break;
case tokenWord:
//
// Check the length of the word. In general, it wil be less than MAXLINE..
// chars, so we just write it out. Otherwise, we write it out in MAX... char
// chunks, breaking arbitrarily as we go.
//
#define MaxChunkSize MAXLINELENGTH
wordLength = strlen(tempName);
if (wordLength <= MaxChunkSize)
{
[self WriteText: tempName];
textOutputLength +=wordLength;
}
else
{
tempLoc = tempName;
numChunks = wordLength / MaxChunkSize;
for (chunkNum = 0; chunkNum < numChunks; chunkNum++)
{
[self Write: MaxChunkSize BytesFrom: (ByteString) tempLoc];
[self WriteText: "\n"]; // kludge for a newline
tempLoc += MaxChunkSize;
}
//
// Write any remaining bytes.
//
[self Write: (wordLength % MaxChunkSize)
BytesFrom: (ByteString) tempLoc];
textOutputLength +=(wordLength % MaxChunkSize);
}
break;
case tokenNoToken:
break;
}
FreeCString(tempName);
[theToken free];
}
FreeCString(buffer);
return theType;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Routine: ClearThroughLastBegin
// Parameters: none
// Returns: YES if succeeded, NO if not
// Stores: success or failure code
// Description:
// This empties current contents of the queue, if there is a begin token.
// Otherwise, it returns an error. Tis allows the caller to backtrack and
// decide they didn't want to write out some group afterall (e.g. starting to
// write {\colortbl only to discover that no colors were used)
// Bugs:
// History:
// 92.12.13 djb Created
// 93.01.02 djb Fixed bug of not clearing QueueHasBegin after clearing!
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- (Boolean) ClearThroughLastBegin
{
Boolean result = NO;
PositiveInteger tokenCount = [thequeue count];
PositiveInteger index = 0;
Instance temptoken;
if (QueueHasBegin == YES)
{
//
// There's a begin here, so just rip out the N tokens, and free them
//
for (index = 0; index < tokenCount; index++)
{
temptoken = [thequeue removeObjectAt:0];
[temptoken free];
}
QueueHasBegin = NO;
result = YES;
[self StoreErrorCode: ERR_OK
AndText: "Succeded in flushing back to { token"];
}
else
{
result = NO;
[self StoreErrorCode: ERR_NOBEGIN
AndText: "Unable to find begin token. Could not clear"];
}
return result;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Routine: FlushPartially
// Parameters: A value indicating what we should do:
// ASONELINE Force the contents of the buffer out one the same line
// PRETTYPRINT Write the tokens out in a nice manner
// Returns: none
// Stores: none
// Description:
// This is much like FlushQueue, below, with one exception, It counts back from
// the end of the queue to find the last non-word or control symbol token in the
// queue. Then, it writes everyting up (and including) to that out.
// This allows us to keep any word tokens on the queue so they have the chance to
// be written out with other word tokens that might be added.
// Bugs:
// If a control word is followed by a control word or space or an end token, one can
// safely omit the trailing space.
// History:
// 93.02.21 djb Added textOutputLength alteration. See Write:TokenAs: for
// more details.
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-FlushPartially: (Integer) instruction
{
Instance theToken;
Boolean seenNonWord = NO;
TokenType theType;
PositiveInteger tokenNum = [thequeue count] ;
while ( (seenNonWord == NO) && (tokenNum > 0) )
{
tokenNum --;
theToken = [thequeue objectAt: tokenNum];
if (theToken != nil)
{
theType = [theToken GetType];
if ( (theType != tokenWord) && (theType != tokenControlSymbol) )
seenNonWord = YES;
}
}
if (seenNonWord == YES)
[self Write: tokenNum+1 TokensAs: instruction];
if (instruction == ASONELINE)
{
[self WriteText: "\n"];
textOutputLength = 0; // 93.02.21 For bugfix
}
return self;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Routine: FlushQueue
// Parameters: A value indicating what we should do:
// ASONELINE Force the contents of the buffer out one the same line
// PRETTYPRINT Write the tokens out in a nice manner
// Returns: none
// Stores: none
// Description:
// This calls Write:TokensAs: to write all the tokens in the queue out. Aside from
// that, this doesn't so much anymore.
// Bugs:
// History:
// 93.02.21 djb Added textOutputLength alteration. See Write:TokenAs: for
// more details.
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- FlushQueue: (Integer) instruction
{
[self Write: [thequeue count] TokensAs: instruction];
if (instruction == ASONELINE)
{
[self WriteText: "\n"];
textOutputLength = 0; // 93.02.21 For bugfix
}
TotalLength = 0;
return self;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Routine: Hint
// Parameters: an item from the enumerated type HintName
// Returns: self
// Stores: none
// Description:
// This is an experiment with implementing a 'hint' method which I've been
// thinking about. I'm sure it's not quite right, but I gotta implement it to
// figure out what's wrong. The purpose of the method is simple. It allows the
// caller to give the instance suggestions about how it might want to behave.
// Thus, it allows, perhaps, chances to tweak performance. But it should not
// cause the output to be altered in any way.
// At the moment, this only recognizes two hints: that forthcomming word tokens
// are picture data, and that they aren't.
// Bugs:
// History:
// 92.12.24 djb created
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- Hint: (HintName) theHint;
{
switch (theHint)
{
case WordsArePictures:
PictureHint = theHint;
break;
case WordsAreNotPictures:
PictureHint = theHint;
break;
case NoHint:
break;
}
return self;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Routine: GetToken
// Parameters: none
// Returns: The token that was read
// Stores: The token we filled
// true if we filled it, false if not
// Description:
// When called, this tries to get a new token from the file. If we have not reached
// EOF, we read the next character, and use it to determine what type of token
// is next. A \ introduces a control word or symbol, a { a begin group, } an end
// group, and anything else introduces a word. Call proper routines to read each in.
// Bugs:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- GetToken
{
Character theChar;
Instance theToken = NullInstance;
[self ResetResults];
if (FileIsOpen == NO )
{
[self StoreErrorCode: ERR_FILENOTOPEN AndText: "Can't read from closed file"];
theToken = NullInstance;
}
else
{
//
// Loop until:
// (1) the end of file is reached
// (2) a token is found,
//
do
{
//
// If we are comming through here a second time (the token was a null
// token the last time though, for instance), then free the token...
//
if (theToken != NullInstance)
[theToken free];
//
// Get the next character, and use it to decide what kind of token is next.
//
theChar = [self LookAtNextCharacter];
switch (theChar)
{
case '{' :
theToken = [self GetOpenBraceToken];
break;
case '}' :
theToken =[self GetCloseBraceToken];
break;
case '\\' :
theToken = [self GetControlToken];
break;
default :
theToken = [self GetWordToken];
break;
}
}
while (([theToken GetType] == tokenNoToken) && (FileLocation != fileAtEOF));
//
// No real error codes to process here.
//
}
if (FileLocation == fileAtEOF)
[self StoreErrorCode: ERR_EOF AndText: "We found the end of file, dude."];
else
[self StoreErrorCode: ERR_OK AndText: "Everything went GREAT!"];
return theToken;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Routine: GetNextControlToken
// Parameters: none
// Returns: The token that was read
// Stores: true if we filled it, false if not
// Description:
// This routine simply walks quickly through the file, looking for the next
// control word or symbol token. It ignores everything else. When it finds the
// start of one, it returns it to the caller.If it fines eof, it returns a null token and
// an error.
// This allows a caller to read in all the control token info more efficiently than caling
// GetNextToken.
// Bugs:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- GetNextControlToken
{
Character theChar;
Instance theToken;
[self ResetResults];
if (FileIsOpen == NO )
{
[self StoreErrorCode: ERR_FILENOTOPEN AndText: "Can't read from closed file"];
theToken = NullInstance;
}
else
{
do
{
theChar = [self ReadByte];
}
while ((theChar != '\\') && (FileLocation != fileAtEOF));
if (theChar == '\\')
{
[self UnGetCharacter];
theToken = [self GetControlToken];
}
else
theToken = NullInstance;
}
if (FileLocation == fileAtEOF)
[self StoreErrorCode: ERR_EOF AndText: "We found the end of file, dude."];
else
[self StoreErrorCode: ERR_OK AndText: "Everything went GREAT!"];
return theToken;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Routine: GetOpenBraceToken
// Parameters: none
// Returns: The token that was read
// Stores: none
// Description:
// This simply initalizes a begin-group token Instance, and returns it. No biggie.
// We then discard the { that is waiting.
// Bugs:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- GetOpenBraceToken
{
Instance theToken;
Character theCharacter = [self GetCharacter];
if (theCharacter == '\{')
{
theToken = [[rtfToken alloc] initTokenOfType: tokenBeginGroup];
[self StoreErrorCode: ERR_OK AndText: "OK"];
}
else
{
theToken = [[rtfToken alloc] initTokenOfType: tokenNoToken];
[self UnGetCharacter];
[self StoreErrorCode: ERR_BADCHAR
AndText: "That was not my type of character"];
}
return theToken;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Routine: GetCloseBraceToken
// Parameters: none
// Returns: The token that was read
// Stores: none
// Description:
// This simply initalizes a begin-group token Instance, and returns it. No biggie.
// We then discard the } that is waiting.
// Bugs:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- GetCloseBraceToken
{
Instance theToken;
Character theCharacter = [self GetCharacter];
if (theCharacter == '}')
{
theToken = [[rtfToken alloc] initTokenOfType: tokenEndGroup];
[self StoreErrorCode: ERR_OK AndText: "OK"];
}
else
{
theToken = [[rtfToken alloc] initTokenOfType: tokenNoToken];
[self UnGetCharacter];
[self StoreErrorCode: ERR_BADCHAR
AndText: "That was not my type of character"];
}
return theToken;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Routine: GetControlSymbolToken
// Parameters: none
// Returns: The token that was read
// Stores: error
// Description:
// Reads in a control token. If the character following the \ is not a letter, it is a
// control symbol. A control symbol has no delimiter, Since, by definition,
// it is 1 character long (aside from the \ escape). (if the sequence is undefined, Word
// appears to simply ignore the control symbol (or word)). Note that \' is unique in
// that it does take a parameter (a 2 char hex value).
// Otherwise, we have a control word, which is a sequence of letters
// followed immediately, in some cases, by a numberic parameter.
// Bugs:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- GetControlToken
{
//
// Read in the symbol character.
//
Character theChar,
theValue;
PositiveInteger nameBufferSize = 15;
PositiveInteger nameBufferIncrement = 15;
PositiveInteger nameIndex;
CString tempName,
tokenName = NewCString(nameBufferSize);
Instance theToken;
Integer theNumber;
[self ResetResults];
theChar = [self GetCharacter];
if (theChar != '\\')
{
[self StoreErrorCode: ERR_BADCHAR
AndText: "The next value in the file was NOT a control token"];
[self UnGetCharacter];
theToken = [[rtfToken alloc] initTokenOfType: tokenNoToken];
}
else
{
theChar = [self GetCharacter];
if (isalpha(theChar)== 0)
{
theToken = [[rtfToken alloc] initTokenOfType: tokenControlSymbol];
//
// Process a control symbol \?
//
sprintf (tokenName, "%c", theChar);
[theToken SetTokenName: tokenName];
//
// If the token is \', then get it's hex parameter..
//
if (theChar == '\'') // if it's the special one
{
theValue = [self GetHexByte];
// @@ IGNORING ERROR CODE!!
[theToken SetTokenValue: theValue];
}
}
else
{
theToken = [[rtfToken alloc] initTokenOfType: tokenControlWord];
//
// Process a control word
//
nameIndex = 0;
while ((isalpha(theChar) != 0) && (FileLocation != fileAtEOF))
{
tokenName[nameIndex] = theChar;
if (nameIndex >= nameBufferSize)
{
//
// Allocate more space for the name!
//
tempName = NewCString(nameBufferSize + nameBufferIncrement);
strncpy(tempName, tokenName, nameBufferSize);
nameBufferSize +=nameBufferIncrement;
FreeCString(tokenName);
tokenName = tempName;
}
theChar = [self GetCharacter];
nameIndex++;
}
tokenName[nameIndex] = NullCharacter;
[theToken SetTokenName: tokenName];
//
// A control word token can be terminated by a space which is consumed.
// f we were not, then we return the character to the file.
// 93.02.21 Modified checking for terminating conditions. We have so far
// accumulated \foo. If the next character is a space, we consume it and
// proceed. Otherwise, we replace it in the stream. If it was, however, a
// digit, we read in the number, and consume the space that follows that, if
// any. (before this, we were not explicitly checking for digits, and so it would
// consider a newline between \foo and thenumber to be no big deal!
// 93.02.21 Fixed a problem I just introduced. Now, we check if the next char
// is a - as well as whether it is a digit.
//
if (theChar != ' ')
{
[self UnGetCharacter];
if ((isdigit(theChar) != 0) || (theChar == '-'))
{
theNumber = [self GetNumber];
if ([self GetErrorCode] == ERR_OK)
[theToken SetTokenValue: theNumber];
if ([self LookAtNextCharacter] == ' ')
[self GetCharacter];
}
}
}
//
// Check that ALL went well to this poi.
//
}
FreeCString(tokenName);
return theToken;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Routine: GetWordToken
// Parameters: none
// Returns: The token that was read
// Stores: error
// Description:
// This reads in the next token from the rtf file. Since we were called by something
// that presumably knows what it's doing, we assume that the next token IS a word
// (if we notice it isn't, we return an error, of course)
// NOTE: spaces are 'legal' components of a Word token IFF they are at the
// beginning before any non-white characters appear.
// Bugs:
// History:
// 92.12.24 djb Modified to regonize the hints for 'WordsArePictures'
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- GetWordToken
{
Boolean terminatingWhite = NO, // true if a white space found after the word
nonWhiteRead = NO; // true if we have read any non-white characters
PositiveInteger nameIndex = 0;
PositiveInteger nameBufferSize;
PositiveInteger nameBufferIncrement;
CString tempName,
name;
Character theChar = [self GetCharacter];
Instance theToken;
[self ResetResults];
//
// Allocate space for the name of this word token, and how much we should increment
// the buffer by if we run out of room. If we suspect we're going to be reading in a
// picture, allocate a HECK of a lot more space for the token than normal.
// Use one less than power of 2 so it and null byte take are a power.
//
if ( PictureHint == WordsArePictures)
{
nameBufferSize = (32*1024) -1; // Allocate 32K, (will hold 16K worth of pict)
nameBufferIncrement = (32*1024) - 1;
}
else
{
nameBufferSize = 15;
nameBufferIncrement = 63;
}
name = NewCString(nameBufferSize);
//
// Set the nonWhite flag to true if it happens that our first character is not a space.
// Also, ignore the character if it is an end of line char.
//
if ((theChar != ' ') && (theChar != '\r') && (theChar != '\n'))
nonWhiteRead = YES;
//
// As long as we haven't found a delimiter, read new characters //
while ((theChar != '{') && // the character isn't an open-group
(theChar != '\\') && //nor is it an escape
(theChar != '}') && // nor is it a close group
(terminatingWhite != YES) && //we haven't found a white space after the word
(FileLocation != fileAtEOF)) // the end of file has not been reached (BAAAD)
{
//
// If the character is a CR or LF, skip it, Otherwise store the char.
//
if ((theChar != '\r') && (theChar != '\n'))
{
name[nameIndex] = theChar;
nameIndex++;
if (nameIndex >= nameBufferSize)
{
//
// Allocate more space for the name!
//
tempName = NewCString(nameBufferSize + nameBufferIncrement);
strncpy(tempName, name, nameBufferSize);
tempName[nameBufferSize] = NullCharacter;
nameBufferSize +=nameBufferIncrement;
FreeCString(name);
name = tempName;
}
}
//
// Get the next character. If we've already read a non-white character, and just
// read a white one, then it serves as a delimiter... //
theChar = [self GetCharacter];
if (theChar == ' ')
{
if (nonWhiteRead == YES)
terminatingWhite = YES;
}
else
nonWhiteRead = YES;
}
//
// If we were delimited by white space, or the escape or begin-end-group chars
// then unget the character.
//
if ((terminatingWhite == YES) ||
(theChar == '\{') ||
(theChar == '\\') ||
(theChar == '}'))
[self UnGetCharacter];
name[nameIndex] = NullCharacter;
if (nameIndex != 0)
{
theToken = [[rtfToken alloc] initTokenOfType: tokenWord];
[theToken SetTokenName: name];
}
else
{
//
// Check for a null token (possible if, say, we fould only CR or LF before
// a control word, or if the first thing we found was a delimiter)
//
theToken = [[rtfToken alloc] initTokenOfType: tokenNoToken];
[self StoreErrorCode: ERR_NOTOKENFOUND AndText: "A null Word token found"];
}
FreeCString(name);
return theToken;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Routine: GetFamilyNameToken
// Parameters: none
// Returns: The token that was read
// Stores: error
// Description:
// This is a specialized version of the GetWordToken call. It exists because it turns
// out that at least MS Word is happy to break the name of a type family across
// lines, and assumes the end of line character will be treated as a space (that is,
// in all other parts of the file, a line ending should be treated as if it doesn't exist.
// In this case, it might mean there should be a space there). This simply does
// the actions of reading in all the characters until a { or \ or } is found (hopefully,
// it will always be the last!!! Perhaps we should even be checking for the errors
// from it. We return the whole to the caller as a word token.
// Bugs:
// History:
// 92.12.24 djb Modified to use namebuffer and namebufferincrement
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- GetFamilyNameToken
{
PositiveInteger nameIndex = 0;
PositiveInteger nameBufferSize = 31;
PositiveInteger nameBufferIncrement = 15;
CString tempName,
name;
Character theChar = [self GetCharacter];
Instance theToken;
[self ResetResults];
//
// Allocate space for the name of family name
//
name = NewCString(nameBufferSize);
//
// As long as we haven't found a delimiter, read new characters //
while ((theChar != '{') && // the character isn't an open-group
(theChar != '\\') && //nor is it an escape
(theChar != '}') && // nor is it a close group
(theChar != ';') && // special because it can terminate a family name
(FileLocation != fileAtEOF)) // the end of file has not been reached (BAAAD)
{
//
// If the character is a CR or LF, skip it, Otherwise store the char.
//
if ((theChar== '\r') || (theChar == '\n'))
name[nameIndex] = ' ';
else
name[nameIndex] = theChar;
nameIndex++;
if (nameIndex >= nameBufferSize)
{
//
// Allocate more space for the name!
//
tempName = NewCString(nameBufferSize + nameBufferIncrement);
strncpy(tempName, name, nameBufferSize);
tempName[nameBufferSize] = NullCharacter;
nameBufferSize +=nameBufferIncrement;
FreeCString(name);
name = tempName;
}
//
// Get the next character. If we've already read a non-white character, and just
// read a white one, then it serves as a delimiter... //
theChar = [self GetCharacter];
}
if (theChar == ';')
{
//
// consume any remaining spaces
//
do
{
theChar = [self GetCharacter];
}
while ( theChar == ' ');
[self UnGetCharacter];
}
//
// If we were delimited by white space, or the escape or begin-end-group chars
// then unget the character.
//
else if ((theChar == '\{') ||
(theChar == '\\') ||
(theChar == '}'))
[self UnGetCharacter];
name[nameIndex] = NullCharacter;
if (strlen(name) != 0)
{
theToken = [[rtfToken alloc] initTokenOfType: tokenWord];
[theToken SetTokenName: name];
}
else
{
//
// Check for a null token (possible if, say, we fould only CR or LF before
// a control word, or if the first thing we found was a delimiter)
//
theToken = [[rtfToken alloc] initTokenOfType: tokenNoToken];
[self StoreErrorCode: ERR_NOTOKENFOUND AndText: "A null Word token found"];
}
FreeCString(name);
return theToken;
}
@endThese are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.