This is rtfFile.m in view mode; [Download] [Up]
/***********************************************************************\ RTF file class for Convert RTF which converts between Mac and NeXT rtf formats. Copyright (C) 1993 David John Burrowes This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. The author, David John Burrowes, can be reached at: davidjohn@kira.net.netcom.com David John Burrowes 1926 Ivy #10 San Mateo, CA 94403-1367 \***********************************************************************/ #import "rtfFile.h" #import <ctype.h> #import <string.h> #import <stdio.h> #import "rtfToken.h" #import <objc/List.h> // for the list class @implementation rtfFile - initAndUse:(roCString) pathname; { [super initAndUse: pathname]; thequeue = [[List alloc] initCount: MAXLINELENGTH]; foundRTF = NO; QueueHasBegin = NO; TotalLength = 0; textOutputLength = 0; // 93.02.21 Added for bugfix return self; } -free { [thequeue free]; return [super free]; } - CloseAndSave { [self FlushQueue:PRETTYPRINT]; [super CloseAndSave]; return self; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Routine: WriteNeXTGraphicAt:WithName:Width:Height: // Parameters: The width and height of the graphic, the file it's in (just the name, no // path) and a value which appears to be a dummy value these days. // Returns: none // Stores: errors // Description: // The NeXTGraphic control word is pretty damn weird. So, this routine is dedicated // to writing it out properly, so callers needn't worry about how it wants to work in // this release. =) // In 3.0 (and hopefully this arrangement will work OK in the future?) the format is: // {<opt-rtf-text>{\NeXTGraphic#<1space>filename<1space>\width#<1space> // \height#\n}\n,-or-¬} // Now, the filename and the width anbd height make sense. The fact that I've seen // both comma and ¬ is strange because I note no difference between them. Not only // that, but remvoing them causes the picture not to display properly. The number after // NeXTGraphic also appears random, in that I can set it to other values and nothing // bad seems to happen. Until I figure out otherwise, callers are recommended to set // this to 0. Scott Hess has suggested, very reasonably, that it may be a character count // (i.e this goes in as the 15th character of this document). This requires some // investigation. In my experience, Edit, at least, is VERY picky about the placement of // some of those braces, etc. So, this will enforce the above layout, even if it isn't very // pretty or doesn't 'go' with other stuff. // Bugs: // History: // 93.01.31 djb Created ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - WriteNeXTGraphicAt: (PositiveInteger) loc WithName: (CString) fileName Width: (Integer) theWidth Height: (Integer) theHeight { CString myString = NewCString(strlen(fileName)+100); // Be lazy and do overkill. // // Clear stuff out so we can write out this monster. // [self FlushQueue: PRETTYPRINT]; // // Build the NeXTGraphic string, and write it out. // ***NOTE*** that we go completely behind the back of the caller. In particular, // this does not update any counters or state variables if they exist. This also // doesn't clip to a particular # of output columns, etc // sprintf(myString, "{{\\NeXTGraphic%lu %s \\width%ld \\height%ld\n}\n¬}", loc, fileName, theWidth, theHeight); [self WriteTextLine: myString]; // // With that dirty deed done, return. // FreeCString(myString); return self; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Routine: WriteToken // Parameters: A token to be written // Returns: none // Stores: errors // Description: // This takes a token, massages the output queue if necessary, and adds the // token to the queue to be written out. The massaging involves flushing the // buffer/queue if (a) this is a begin token, or if the queue will generate a line // that is longer than we'd like of output. After this, we just add the current // token, and if it is an end token and there is still a { ath the head of the queue, // we flush the queue then. // Bugs: // History: // 92.12.25 djb Added tempName, so as not to be leaking the cstring to the 'rtf' test ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - WriteToken: theToken { TokenType theType = [theToken GetType]; PositiveInteger deltaLength = [theToken GetLength]; Boolean dontFlush = NO; CString tempName; // // Watch for initial {\rtf8\foo tokens. Note the order here is very important, and // we rely on a don'tflush flag to ourselves from flushing right after the \rtf rathern // than after the \foo. This is awkward and a bit ugly, and would be easier except // that the wy things are built now, one can end up with one's token being free-ed // while one still needs it. Rather than fix the bug, we add a flag as a workaround. // (removing the flag necessitates the following if statement to be moved below after // the flush, which results in an error). This whole sequence assures that the // {\rtf0\foo are written on the same line // This is a bit of a hack, but it works fine, in general. It assumes, however, that // the only \rtf token is found at the beginning of the file (not a big deal if it doesn't, // just a bit weird). It further assumes that the control word following the \rtf // token is the char set one. This, also, is generaly safe. But it IS an assumption. // tempName = [theToken GetName]; // // Add a check to avoid trying to write out null length words, if the caller is sloppy. // This is a quick hack to fix a problem I just found during final beta testing. // It should be fixed as well in the rtf converter itself. But it's good to have the added // filter here. // if ((strcmp(tempName, "") == 0) && (theType == tokenWord)) [theToken free]; else { if (strcmp(tempName, "rtf") == 0) { foundRTF = YES; dontFlush = YES; } // // Compute added length information, and flush if we have a { // switch (theType) { case tokenBeginGroup: [self FlushQueue: PRETTYPRINT]; QueueHasBegin = YES; deltaLength+=1; // escape symbol ( { ) break; case tokenEndGroup: deltaLength+=1; // escape symbol ( } ) break; case tokenControlSymbol: deltaLength+=1; // escape symbol ( \ ) break; case tokenControlWord: deltaLength+=2; // escape symbol and trailing space break; case tokenNoToken: case tokenWord: deltaLength +=0; break; } // // If our length is too long, then try to flush any leading control words or symbols. // if (TotalLength + deltaLength > MAXLINELENGTH) [self FlushPartially: PRETTYPRINT]; // // Flush everything (not just control words or symbols) if space is still needed. // and store the total lenght. Then, store the new token. // if (TotalLength + deltaLength > MAXLINELENGTH) { [self FlushQueue: PRETTYPRINT]; TotalLength = deltaLength; } else TotalLength += deltaLength; [thequeue addObject: theToken]; // // // if ((foundRTF == YES) && (dontFlush == NO)) { foundRTF = NO; [self FlushQueue: ASONELINE]; } // // If we added a } token, and there is a { at the beginning of the buffer, flush all. // if ((theType == tokenEndGroup) && (QueueHasBegin == YES)) [self FlushQueue: ASONELINE]; } FreeCString(tempName); return self; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Routine: Write:TokensAs: // Parameters: The number of tokens to write, and a value indicating what we should do: // ASONELINE Force the contents of the buffer out one the same line // PRETTYPRINT Write the tokens out in a nice manner // Returns: The type of the last token we wrote out. // Stores: none // Description: // This is the core routine for writing out information from the queue of tokens. // you tell it how many tokens to write out, and then it will write them. If you // tell it to prettyprint, it does so. If you say asoneline, it will force all the tokens on // the queue to be written out on one line. // This does decrement TotalLength. // If ASONELINE is specified, no newlines are written. // with prettyprint, newlines always follow { and } characters, as well as // control words. Control symbols and regular old words only get following // newlines if they are the last token in the queue, or if they are followed by // other types of tokens (being, end control word). // It then returns the type of the last token. this might allow you to do some kind // of special post processing (e.g. if it returns a tokenWord, and you know it wasn't // at the end of the queue, then you know there is no newline after it in the file, // which might or might not please you) // Bugs: // If a control word is followed by a control word or begin or an end token, one can // safely omit the trailing space. But, we don't. // In a slightly earlier version, we weren't writing out \* tokens. Why? // History: // 92.12.25 djb Added tempName to allow us to properly free cstrings we were leaking // 93.02.21 djb Added textOutputLength stuff. I found that chunks of text were // being written out as a single long line, rather than breaking // at most every 72 (or whatever) characters. The new scheme keeps // incrementing textOutputLine whenever we write a word or a // controlSymbol (otherwise we set it to 0). If we find that the line is // full before we write the token, we move to a new line and continue. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - (TokenType) Write: (PositiveInteger) numTokens TokensAs: (Integer) instruction { TokenType theType = tokenNoToken; // TokenType nextType; Instance theToken; //nextToken; CString buffer = NewCString(1024); PositiveInteger tokenCount = numTokens ; CString tempName; CString tempLoc; PositiveInteger numChunks; PositiveInteger chunkNum; PositiveInteger wordLength; PositiveInteger outputLength; while (tokenCount > 0) { // // Get the next token to write out, and decrement the total length of the pending // output line. // tokenCount--; theToken = [thequeue removeObjectAt:0]; theType = [theToken GetType]; TotalLength -= [theToken GetLength]; if (theType == tokenControlWord) TotalLength -= 2; else if (theType != tokenWord) TotalLength --; tempName = [theToken GetName]; // // 93.02.21 Prepare the line based on the type of the current token (this could // be done in the switch further below, but this logically groups a bit differently, // and I figured it would be clearer if I separated it out all up here. For some token // types, we reset the length of the text output line to 0 because in all these cases, // if we are prettyprinting, then these all finish up and move the current location // to the start of a new line. If we are writing all out asoneline, then it might be // reasonable to increment the textoutputlength by the length of the token. Yet, // this is irrelevant since the whole is to be printed on one line anyway, so I just // set them to 0 too. If it is a controlsymbol or word, we check if the token will fit // on the current line. If not, we start a new line, and then continue. (the line // value is incremented for these below. // switch (theType) { case tokenBeginGroup: case tokenEndGroup: case tokenControlWord: // // If there is text length right now, then we must move to a new line, // assuming we are prettyprinting // if ((textOutputLength != 0) && (instruction != ASONELINE)) [self WriteTextLine: ""]; // // Always set to zero, since essentially always starting fresh after these // textOutputLength = 0; break; case tokenControlSymbol: case tokenWord: // // Check the length of the token, relative to the line. If it won't fit, // start a new output line. (the routines below increment the // textOutputLength. This is mainly because it would be too messy to // deal with the need to break a long word up here (this has the effect, // then, of always starting a very long word on it's own line. This might // result in a less than ideal output) // if (theType == tokenControlSymbol) { if (strcmp(tempName, "\'") == 0) outputLength = 4; else outputLength = 2; } else outputLength = strlen(tempName); if ( ((outputLength + textOutputLength) > MAXLINELENGTH) && (instruction != ASONELINE)) { textOutputLength = 0; [self WriteTextLine: ""]; } break; case tokenNoToken: // // Do nothing. // break; } // // Write the actual token out... // switch (theType) { case tokenBeginGroup: QueueHasBegin = NO; // only 1 begin in queue at a time, and this removes it if (instruction == ASONELINE) [self WriteText: "{"]; else [self WriteTextLine: "{"]; break; case tokenEndGroup: if (instruction == ASONELINE) [self WriteText: "}"]; else [self WriteTextLine: "}"]; break; case tokenControlWord: // // Write out the control word appropriately, depending on whether we // are writing fixed output, or pretty printing, and whether or not // it has a numeric parameter. // if (instruction == ASONELINE) { if ( [theToken HasValue] == YES) [self WriteTextUsing: buffer WithFormat: "\\%s%d ", tempName, [theToken GetValue]]; else // no parameter [self WriteTextUsing: buffer WithFormat: "\\%s ", tempName]; } else { if ( [theToken HasValue] == YES) [self WriteTextUsing: buffer WithFormat: "\\%s%d\n", tempName, [theToken GetValue]]; else // no parameter [self WriteTextUsing: buffer WithFormat: "\\%s\n", tempName]; } break; case tokenControlSymbol: if (strcmp(tempName, "\'") == 0) { [self WriteTextUsing: buffer WithFormat: "\\\'%.2x", [theToken GetValue]]; textOutputLength +=4; } else { if ( [theToken HasValue] == YES) [self WriteTextUsing: buffer WithFormat: "\\%s%d", tempName, [theToken GetValue]]; else // no parameter [self WriteTextUsing: buffer WithFormat: "\\%s", tempName]; // // This is oversimplistic. It ignores the case of one with a // parameter (which shouldn't happen anyway, though) // textOutputLength +=2; } break; case tokenWord: // // Check the length of the word. In general, it wil be less than MAXLINE.. // chars, so we just write it out. Otherwise, we write it out in MAX... char // chunks, breaking arbitrarily as we go. // #define MaxChunkSize MAXLINELENGTH wordLength = strlen(tempName); if (wordLength <= MaxChunkSize) { [self WriteText: tempName]; textOutputLength +=wordLength; } else { tempLoc = tempName; numChunks = wordLength / MaxChunkSize; for (chunkNum = 0; chunkNum < numChunks; chunkNum++) { [self Write: MaxChunkSize BytesFrom: (ByteString) tempLoc]; [self WriteText: "\n"]; // kludge for a newline tempLoc += MaxChunkSize; } // // Write any remaining bytes. // [self Write: (wordLength % MaxChunkSize) BytesFrom: (ByteString) tempLoc]; textOutputLength +=(wordLength % MaxChunkSize); } break; case tokenNoToken: break; } FreeCString(tempName); [theToken free]; } FreeCString(buffer); return theType; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Routine: ClearThroughLastBegin // Parameters: none // Returns: YES if succeeded, NO if not // Stores: success or failure code // Description: // This empties current contents of the queue, if there is a begin token. // Otherwise, it returns an error. Tis allows the caller to backtrack and // decide they didn't want to write out some group afterall (e.g. starting to // write {\colortbl only to discover that no colors were used) // Bugs: // History: // 92.12.13 djb Created // 93.01.02 djb Fixed bug of not clearing QueueHasBegin after clearing! ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - (Boolean) ClearThroughLastBegin { Boolean result = NO; PositiveInteger tokenCount = [thequeue count]; PositiveInteger index = 0; Instance temptoken; if (QueueHasBegin == YES) { // // There's a begin here, so just rip out the N tokens, and free them // for (index = 0; index < tokenCount; index++) { temptoken = [thequeue removeObjectAt:0]; [temptoken free]; } QueueHasBegin = NO; result = YES; [self StoreErrorCode: ERR_OK AndText: "Succeded in flushing back to { token"]; } else { result = NO; [self StoreErrorCode: ERR_NOBEGIN AndText: "Unable to find begin token. Could not clear"]; } return result; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Routine: FlushPartially // Parameters: A value indicating what we should do: // ASONELINE Force the contents of the buffer out one the same line // PRETTYPRINT Write the tokens out in a nice manner // Returns: none // Stores: none // Description: // This is much like FlushQueue, below, with one exception, It counts back from // the end of the queue to find the last non-word or control symbol token in the // queue. Then, it writes everyting up (and including) to that out. // This allows us to keep any word tokens on the queue so they have the chance to // be written out with other word tokens that might be added. // Bugs: // If a control word is followed by a control word or space or an end token, one can // safely omit the trailing space. // History: // 93.02.21 djb Added textOutputLength alteration. See Write:TokenAs: for // more details. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -FlushPartially: (Integer) instruction { Instance theToken; Boolean seenNonWord = NO; TokenType theType; PositiveInteger tokenNum = [thequeue count] ; while ( (seenNonWord == NO) && (tokenNum > 0) ) { tokenNum --; theToken = [thequeue objectAt: tokenNum]; if (theToken != nil) { theType = [theToken GetType]; if ( (theType != tokenWord) && (theType != tokenControlSymbol) ) seenNonWord = YES; } } if (seenNonWord == YES) [self Write: tokenNum+1 TokensAs: instruction]; if (instruction == ASONELINE) { [self WriteText: "\n"]; textOutputLength = 0; // 93.02.21 For bugfix } return self; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Routine: FlushQueue // Parameters: A value indicating what we should do: // ASONELINE Force the contents of the buffer out one the same line // PRETTYPRINT Write the tokens out in a nice manner // Returns: none // Stores: none // Description: // This calls Write:TokensAs: to write all the tokens in the queue out. Aside from // that, this doesn't so much anymore. // Bugs: // History: // 93.02.21 djb Added textOutputLength alteration. See Write:TokenAs: for // more details. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - FlushQueue: (Integer) instruction { [self Write: [thequeue count] TokensAs: instruction]; if (instruction == ASONELINE) { [self WriteText: "\n"]; textOutputLength = 0; // 93.02.21 For bugfix } TotalLength = 0; return self; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Routine: Hint // Parameters: an item from the enumerated type HintName // Returns: self // Stores: none // Description: // This is an experiment with implementing a 'hint' method which I've been // thinking about. I'm sure it's not quite right, but I gotta implement it to // figure out what's wrong. The purpose of the method is simple. It allows the // caller to give the instance suggestions about how it might want to behave. // Thus, it allows, perhaps, chances to tweak performance. But it should not // cause the output to be altered in any way. // At the moment, this only recognizes two hints: that forthcomming word tokens // are picture data, and that they aren't. // Bugs: // History: // 92.12.24 djb created ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - Hint: (HintName) theHint; { switch (theHint) { case WordsArePictures: PictureHint = theHint; break; case WordsAreNotPictures: PictureHint = theHint; break; case NoHint: break; } return self; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Routine: GetToken // Parameters: none // Returns: The token that was read // Stores: The token we filled // true if we filled it, false if not // Description: // When called, this tries to get a new token from the file. If we have not reached // EOF, we read the next character, and use it to determine what type of token // is next. A \ introduces a control word or symbol, a { a begin group, } an end // group, and anything else introduces a word. Call proper routines to read each in. // Bugs: ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - GetToken { Character theChar; Instance theToken = NullInstance; [self ResetResults]; if (FileIsOpen == NO ) { [self StoreErrorCode: ERR_FILENOTOPEN AndText: "Can't read from closed file"]; theToken = NullInstance; } else { // // Loop until: // (1) the end of file is reached // (2) a token is found, // do { // // If we are comming through here a second time (the token was a null // token the last time though, for instance), then free the token... // if (theToken != NullInstance) [theToken free]; // // Get the next character, and use it to decide what kind of token is next. // theChar = [self LookAtNextCharacter]; switch (theChar) { case '{' : theToken = [self GetOpenBraceToken]; break; case '}' : theToken =[self GetCloseBraceToken]; break; case '\\' : theToken = [self GetControlToken]; break; default : theToken = [self GetWordToken]; break; } } while (([theToken GetType] == tokenNoToken) && (FileLocation != fileAtEOF)); // // No real error codes to process here. // } if (FileLocation == fileAtEOF) [self StoreErrorCode: ERR_EOF AndText: "We found the end of file, dude."]; else [self StoreErrorCode: ERR_OK AndText: "Everything went GREAT!"]; return theToken; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Routine: GetNextControlToken // Parameters: none // Returns: The token that was read // Stores: true if we filled it, false if not // Description: // This routine simply walks quickly through the file, looking for the next // control word or symbol token. It ignores everything else. When it finds the // start of one, it returns it to the caller.If it fines eof, it returns a null token and // an error. // This allows a caller to read in all the control token info more efficiently than caling // GetNextToken. // Bugs: ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - GetNextControlToken { Character theChar; Instance theToken; [self ResetResults]; if (FileIsOpen == NO ) { [self StoreErrorCode: ERR_FILENOTOPEN AndText: "Can't read from closed file"]; theToken = NullInstance; } else { do { theChar = [self ReadByte]; } while ((theChar != '\\') && (FileLocation != fileAtEOF)); if (theChar == '\\') { [self UnGetCharacter]; theToken = [self GetControlToken]; } else theToken = NullInstance; } if (FileLocation == fileAtEOF) [self StoreErrorCode: ERR_EOF AndText: "We found the end of file, dude."]; else [self StoreErrorCode: ERR_OK AndText: "Everything went GREAT!"]; return theToken; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Routine: GetOpenBraceToken // Parameters: none // Returns: The token that was read // Stores: none // Description: // This simply initalizes a begin-group token Instance, and returns it. No biggie. // We then discard the { that is waiting. // Bugs: ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - GetOpenBraceToken { Instance theToken; Character theCharacter = [self GetCharacter]; if (theCharacter == '\{') { theToken = [[rtfToken alloc] initTokenOfType: tokenBeginGroup]; [self StoreErrorCode: ERR_OK AndText: "OK"]; } else { theToken = [[rtfToken alloc] initTokenOfType: tokenNoToken]; [self UnGetCharacter]; [self StoreErrorCode: ERR_BADCHAR AndText: "That was not my type of character"]; } return theToken; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Routine: GetCloseBraceToken // Parameters: none // Returns: The token that was read // Stores: none // Description: // This simply initalizes a begin-group token Instance, and returns it. No biggie. // We then discard the } that is waiting. // Bugs: ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - GetCloseBraceToken { Instance theToken; Character theCharacter = [self GetCharacter]; if (theCharacter == '}') { theToken = [[rtfToken alloc] initTokenOfType: tokenEndGroup]; [self StoreErrorCode: ERR_OK AndText: "OK"]; } else { theToken = [[rtfToken alloc] initTokenOfType: tokenNoToken]; [self UnGetCharacter]; [self StoreErrorCode: ERR_BADCHAR AndText: "That was not my type of character"]; } return theToken; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Routine: GetControlSymbolToken // Parameters: none // Returns: The token that was read // Stores: error // Description: // Reads in a control token. If the character following the \ is not a letter, it is a // control symbol. A control symbol has no delimiter, Since, by definition, // it is 1 character long (aside from the \ escape). (if the sequence is undefined, Word // appears to simply ignore the control symbol (or word)). Note that \' is unique in // that it does take a parameter (a 2 char hex value). // Otherwise, we have a control word, which is a sequence of letters // followed immediately, in some cases, by a numberic parameter. // Bugs: ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - GetControlToken { // // Read in the symbol character. // Character theChar, theValue; PositiveInteger nameBufferSize = 15; PositiveInteger nameBufferIncrement = 15; PositiveInteger nameIndex; CString tempName, tokenName = NewCString(nameBufferSize); Instance theToken; Integer theNumber; [self ResetResults]; theChar = [self GetCharacter]; if (theChar != '\\') { [self StoreErrorCode: ERR_BADCHAR AndText: "The next value in the file was NOT a control token"]; [self UnGetCharacter]; theToken = [[rtfToken alloc] initTokenOfType: tokenNoToken]; } else { theChar = [self GetCharacter]; if (isalpha(theChar)== 0) { theToken = [[rtfToken alloc] initTokenOfType: tokenControlSymbol]; // // Process a control symbol \? // sprintf (tokenName, "%c", theChar); [theToken SetTokenName: tokenName]; // // If the token is \', then get it's hex parameter.. // if (theChar == '\'') // if it's the special one { theValue = [self GetHexByte]; // @@ IGNORING ERROR CODE!! [theToken SetTokenValue: theValue]; } } else { theToken = [[rtfToken alloc] initTokenOfType: tokenControlWord]; // // Process a control word // nameIndex = 0; while ((isalpha(theChar) != 0) && (FileLocation != fileAtEOF)) { tokenName[nameIndex] = theChar; if (nameIndex >= nameBufferSize) { // // Allocate more space for the name! // tempName = NewCString(nameBufferSize + nameBufferIncrement); strncpy(tempName, tokenName, nameBufferSize); nameBufferSize +=nameBufferIncrement; FreeCString(tokenName); tokenName = tempName; } theChar = [self GetCharacter]; nameIndex++; } tokenName[nameIndex] = NullCharacter; [theToken SetTokenName: tokenName]; // // A control word token can be terminated by a space which is consumed. // f we were not, then we return the character to the file. // 93.02.21 Modified checking for terminating conditions. We have so far // accumulated \foo. If the next character is a space, we consume it and // proceed. Otherwise, we replace it in the stream. If it was, however, a // digit, we read in the number, and consume the space that follows that, if // any. (before this, we were not explicitly checking for digits, and so it would // consider a newline between \foo and thenumber to be no big deal! // 93.02.21 Fixed a problem I just introduced. Now, we check if the next char // is a - as well as whether it is a digit. // if (theChar != ' ') { [self UnGetCharacter]; if ((isdigit(theChar) != 0) || (theChar == '-')) { theNumber = [self GetNumber]; if ([self GetErrorCode] == ERR_OK) [theToken SetTokenValue: theNumber]; if ([self LookAtNextCharacter] == ' ') [self GetCharacter]; } } } // // Check that ALL went well to this poi. // } FreeCString(tokenName); return theToken; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Routine: GetWordToken // Parameters: none // Returns: The token that was read // Stores: error // Description: // This reads in the next token from the rtf file. Since we were called by something // that presumably knows what it's doing, we assume that the next token IS a word // (if we notice it isn't, we return an error, of course) // NOTE: spaces are 'legal' components of a Word token IFF they are at the // beginning before any non-white characters appear. // Bugs: // History: // 92.12.24 djb Modified to regonize the hints for 'WordsArePictures' ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - GetWordToken { Boolean terminatingWhite = NO, // true if a white space found after the word nonWhiteRead = NO; // true if we have read any non-white characters PositiveInteger nameIndex = 0; PositiveInteger nameBufferSize; PositiveInteger nameBufferIncrement; CString tempName, name; Character theChar = [self GetCharacter]; Instance theToken; [self ResetResults]; // // Allocate space for the name of this word token, and how much we should increment // the buffer by if we run out of room. If we suspect we're going to be reading in a // picture, allocate a HECK of a lot more space for the token than normal. // Use one less than power of 2 so it and null byte take are a power. // if ( PictureHint == WordsArePictures) { nameBufferSize = (32*1024) -1; // Allocate 32K, (will hold 16K worth of pict) nameBufferIncrement = (32*1024) - 1; } else { nameBufferSize = 15; nameBufferIncrement = 63; } name = NewCString(nameBufferSize); // // Set the nonWhite flag to true if it happens that our first character is not a space. // Also, ignore the character if it is an end of line char. // if ((theChar != ' ') && (theChar != '\r') && (theChar != '\n')) nonWhiteRead = YES; // // As long as we haven't found a delimiter, read new characters // while ((theChar != '{') && // the character isn't an open-group (theChar != '\\') && //nor is it an escape (theChar != '}') && // nor is it a close group (terminatingWhite != YES) && //we haven't found a white space after the word (FileLocation != fileAtEOF)) // the end of file has not been reached (BAAAD) { // // If the character is a CR or LF, skip it, Otherwise store the char. // if ((theChar != '\r') && (theChar != '\n')) { name[nameIndex] = theChar; nameIndex++; if (nameIndex >= nameBufferSize) { // // Allocate more space for the name! // tempName = NewCString(nameBufferSize + nameBufferIncrement); strncpy(tempName, name, nameBufferSize); tempName[nameBufferSize] = NullCharacter; nameBufferSize +=nameBufferIncrement; FreeCString(name); name = tempName; } } // // Get the next character. If we've already read a non-white character, and just // read a white one, then it serves as a delimiter... // theChar = [self GetCharacter]; if (theChar == ' ') { if (nonWhiteRead == YES) terminatingWhite = YES; } else nonWhiteRead = YES; } // // If we were delimited by white space, or the escape or begin-end-group chars // then unget the character. // if ((terminatingWhite == YES) || (theChar == '\{') || (theChar == '\\') || (theChar == '}')) [self UnGetCharacter]; name[nameIndex] = NullCharacter; if (nameIndex != 0) { theToken = [[rtfToken alloc] initTokenOfType: tokenWord]; [theToken SetTokenName: name]; } else { // // Check for a null token (possible if, say, we fould only CR or LF before // a control word, or if the first thing we found was a delimiter) // theToken = [[rtfToken alloc] initTokenOfType: tokenNoToken]; [self StoreErrorCode: ERR_NOTOKENFOUND AndText: "A null Word token found"]; } FreeCString(name); return theToken; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Routine: GetFamilyNameToken // Parameters: none // Returns: The token that was read // Stores: error // Description: // This is a specialized version of the GetWordToken call. It exists because it turns // out that at least MS Word is happy to break the name of a type family across // lines, and assumes the end of line character will be treated as a space (that is, // in all other parts of the file, a line ending should be treated as if it doesn't exist. // In this case, it might mean there should be a space there). This simply does // the actions of reading in all the characters until a { or \ or } is found (hopefully, // it will always be the last!!! Perhaps we should even be checking for the errors // from it. We return the whole to the caller as a word token. // Bugs: // History: // 92.12.24 djb Modified to use namebuffer and namebufferincrement ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - GetFamilyNameToken { PositiveInteger nameIndex = 0; PositiveInteger nameBufferSize = 31; PositiveInteger nameBufferIncrement = 15; CString tempName, name; Character theChar = [self GetCharacter]; Instance theToken; [self ResetResults]; // // Allocate space for the name of family name // name = NewCString(nameBufferSize); // // As long as we haven't found a delimiter, read new characters // while ((theChar != '{') && // the character isn't an open-group (theChar != '\\') && //nor is it an escape (theChar != '}') && // nor is it a close group (theChar != ';') && // special because it can terminate a family name (FileLocation != fileAtEOF)) // the end of file has not been reached (BAAAD) { // // If the character is a CR or LF, skip it, Otherwise store the char. // if ((theChar== '\r') || (theChar == '\n')) name[nameIndex] = ' '; else name[nameIndex] = theChar; nameIndex++; if (nameIndex >= nameBufferSize) { // // Allocate more space for the name! // tempName = NewCString(nameBufferSize + nameBufferIncrement); strncpy(tempName, name, nameBufferSize); tempName[nameBufferSize] = NullCharacter; nameBufferSize +=nameBufferIncrement; FreeCString(name); name = tempName; } // // Get the next character. If we've already read a non-white character, and just // read a white one, then it serves as a delimiter... // theChar = [self GetCharacter]; } if (theChar == ';') { // // consume any remaining spaces // do { theChar = [self GetCharacter]; } while ( theChar == ' '); [self UnGetCharacter]; } // // If we were delimited by white space, or the escape or begin-end-group chars // then unget the character. // else if ((theChar == '\{') || (theChar == '\\') || (theChar == '}')) [self UnGetCharacter]; name[nameIndex] = NullCharacter; if (strlen(name) != 0) { theToken = [[rtfToken alloc] initTokenOfType: tokenWord]; [theToken SetTokenName: name]; } else { // // Check for a null token (possible if, say, we fould only CR or LF before // a control word, or if the first thing we found was a delimiter) // theToken = [[rtfToken alloc] initTokenOfType: tokenNoToken]; [self StoreErrorCode: ERR_NOTOKENFOUND AndText: "A null Word token found"]; } FreeCString(name); return theToken; } @end
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.