This is BibTexParser.m in view mode; [Download] [Up]
// Copyright H. Giesen, University of Koblenz-Landau 1996 #import "BibTexParser.h" #import "Controller.h" #import "ErrorInspector.h" #import "BibliographicFile.h" #import "Preferences.h" locType fieldLoc; int lineNumber; int soEntry; char nameBuffer[128]; id entryNameList = NULL; id fieldNameList = NULL; id configList = NULL;// content is of 'configListType' (-> Preferences.h) id preferences = NULL; // special entryNames static int STRING; static int PREAMBLE; static int COMMENT; static int NOTE; /********** NXSeek(), NXTell() : These functions set or report the current position in the stream given as an argument. This position determines which data will be read next or where the next data will be written since the functions for reading and writing to a stream start from the current position. NXSeek() sets the position offset number of bytes from the place indicated by ptrName, which can be NX_FROMSTART, NX_FROMCURRENT, or NX_FROMEND. NXTell() returns the current position of the buffer. This information can then be used in a call to NXSeek(). ***************/ #define HERE (NXTell( theStream ) - 1) #define isPunctuation(x) (x=='.' || x==':' || x==';' ||\ x==',' || x=='?' || x=='!' || x=='`' || x=='\'' || x==LPARA ||\ x==RPARA || x=='[' || x==']' || x=='-' || x=='/' || x=='*' || x=='@') #define isOneOfTheTen(x) (x=='"' || x=='#' || x=='%' || x=='\''\ || x==LPARA || x==RPARA || x==',' || x=='=' || x==LBRACE || x==RBRACE) #define isNameChar(x) ( !isOneOfTheTen(x) && x!=EOF && !NXIsSpace(x) ) // the following string table is adopted from next.sty const char *NX2TeXTable[] = { // first string is char 0x80 /*0x80*/ "{~}", "{\\`A}", "{\\'A}", "{\\^A}", /*0x84*/ "{\\~A}", "{\\\"A}", "{\\AA}", "{\\c C}", /*0x88*/ "{\\`E}", "{\\'E}", "{\\^E}", "{\\\"E}", /*0x8C*/ "{\\`I}", "{\\'I}", "{\\^I}", "{\\\"I}", /*0x90*/ "{?}", "{\\~N}", "{\\`O}", "{\\'O}", /*0x94*/ "{\\^O}", "{\\~O}", "{\\\"O}", "{\\`U}", /*0x98*/ "{\\'U}", "{\\^U}", "{\\\"U}", "{\\'Y}", /*0x9C*/ "{?}", "{$\\mu$}", "{$\\times$}", "{$\\div$}", /*0xA0*/ "{\\copyright}", "{!`}", "{?}", "{{\\it\\$}}", /*0xA4*/ "{/}", "{?}", "{$f$}","{\\S}", /*0xA8*/ "{?}", "{{\\tt'}}", "{``}", "{?}", //flqq /*0xAC*/ "{?}", "{?}", "{fi}", "{fl}", //flq, frq /*0xB0*/ "{?}", "{--}", "{\\dag}", "{\\ddag}", /*0xB4*/ "{$\\cdot$}", "{{\\tt|}}", "{\\P}", "{$\\bullet$}", /*0xB8*/ "{,}", "{,,}", "{''}", "{?}", //frqq /*0xBC*/ "{...}", "{?}", "{$\\lnot$}", "{?`}", /*0xC0*/ "{${}^1$}", "{\\`{}}", "{\\'{}}", "{\\^{}}", /*0xC4*/ "{\\~{}}", "{\\={}}", "{\\u{}}", "{\\.{}}", /*0xC8*/ "{\\\"{}}", "{${}^2$}", "{\\char23 }", "{\\c{}}", /*0xCC*/ "{${}^3$}", "{\\H{}}", "{?}", "{\\v{}}", /*0xD0*/ "{---}", "{$\\pm$}", "{$1\\over4$}", "{$1\\over2$}", /*0xD4*/ "{$3\\over4$}", "{\\`a}", "{\\'a}", "{\\^a}", /*0xD8*/ "{\\~a}", "{\\\"a}", "{\\aa}", "{\\c c}", /*0xDC*/ "{\\`e}", "{\\'e}", "{\\^e}", "{\\\"e}", /*0xE0*/ "{\\`\\i}", "{\\AE}", "{\\'\\i}", "{${}^a$}", /*0xE4*/ "{\\^\\i}", "{\\\"\\i}", "{?}", "{\\~n}", /*0xE8*/ "{\\L}", "{\\O}", "{\\OE}", "{${}^o$}", /*0xE0*/ "{\\`o}", "{\\'o}", "{\\^o}", "{\\~o}", /*0xF0*/ "{\\\"o}", "{\\ae}", "{\\`u}", "{\\'u}", /*0xF4*/ "{\\^u}", "{\\i}", "{\\\"u}", "{\\'y}", /*0xF8*/ "{\\l}", "{\\o}", "{\\oe}", "{\\ss}", /*0xFC*/ "{?}", "{\\\"y}", "{?}", "{?}" }; @implementation BibTexParser + initParsing { lineNumber = 1; return self; } + (locType)fieldFor:(int)fieldType { locType loc; configListType *item; item = ((configListType*)[configList elementAt:fieldType]); loc.length = item->length; loc.start = item->start; return loc; } + (const char **)NX2TeX { return NX2TeXTable; } - init { [super init]; c = ' '; if( preferences ) return self; preferences = [Preferences new]; entryNameList = [preferences entryNameList]; fieldNameList = [preferences fieldNameList]; configList = [preferences configList]; STRING = [preferences indexOfEntryname:"STRING"]; PREAMBLE = [preferences indexOfEntryname:"PREAMBLE"]; COMMENT = [preferences indexOfEntryname:"COMMENT"]; NOTE = [preferences indexOfFieldname:"NOTE"]; return self; } - callTheEditorForFile:(char *)fName inLine:(int)lineNumber { Speaker *editSpeaker; port_t editPort; int msgReturn, result; char lineArg[20]; // connect to applikation Edit editSpeaker = [NXApp appSpeaker]; editPort = NXPortFromName([preferences theEditor], NULL); if ( editSpeaker && editPort != PORT_NULL ){ // open the file [editSpeaker setSendPort:editPort]; msgReturn = [editSpeaker openFile:fName ok: &result]; // select the linenumber if (msgReturn == 0 && result) { sprintf(lineArg, "%d:%d", lineNumber, lineNumber); [editSpeaker msgSetPosition:lineArg posType: NX_LINENUMPOSTYPE andSelect: YES ok: &result]; } else { NXRunAlertPanel("Bibliography", // title "cannot connect to %s",// message " OK ", // 1: default button NULL, // 0: alternate NULL, //-1: other [preferences theEditor] ); } port_deallocate(task_self(), editPort); } else { NXRunAlertPanel("Bibliography", // title "cannot connect to %s",// message " OK ", // 1: default button NULL, // 0: alternate NULL, //-1: other [preferences theEditor] ); } return self; } static char* errorText[] = { "error number 0", // 0 "entry does not start with '@'", // 1 "invalid entryname, I'm skipping whatever " "remains of this entry ", // 2 "refkey is empty", // 3 "'{' or '(' expected, I'm skipping whatever " // ) } "remains of this entry ", // 4 "End Of File while reading key", // 5 "fieldname not followed by '='", // 6 "'=' after @STRING missing", // 7 "key not delimited with ','", // 8 }; - (void) markError:(int)nr { NXRunAlertPanel("Bibliography", // title "%s in %s (line %d)",// message " OK ", // 1: default button NULL, // 0: alternate NULL, //-1: other errorText[nr], [owner fullPath], lineNumber ); // lineNumber is not defined in a stream } - positionTo:(int)pos { NXSeek( theStream, pos, NX_FROMSTART ); return self; } /// --------------------------------- #ifdef XXX /* - (void) nextByte * ... positioniert auf das naechste Byte. * ... soll immer verwendet werden, wenn ein Zeichen "wegzulesen" ist */ - (void) nextByte // never used { if( c=='\n' ) lineNumber++; c = NXGetc( theStream ); if( c > 127 ){ fprintf( stderr, "illegal char %02x in line %d\n", (unsigned) c, lineNumber ); } } #endif // hint: check for illegal characters (c>127) #define NextByte {if( c=='\n' )lineNumber++; c = NXGetc( theStream );} /* - skipSpace ----------------------- * positioniert auf das naechste signifikante Zeichen * das ist jedes Zeichen != SPACE, also z.B. auch EOF */ // precondition: TRUE - (void) skipSpace { if( NXIsSpace(c) ){ while( NXIsSpace(c) ) NextByte; } } // postcondition: c!= space // precondition: TRUE #define SkipSpace {while( NXIsSpace(c) ) NextByte} // postcondition: c!= space /* - skipComment == skip to next '@' */ // precondition: TRUE - (void) skipComment { while( (c!='@') && (c!=EOF) ) NextByte; return; } // postcondition: c=='@' || c==EOF - (const char *)entryNameString:(int)theType { return (const char *)[entryNameList objectAt:theType]; } /* - (int)entryName ----------------------- * Diese Methode soll reservierte Namen (entrynames) * erkennen. Der Eingabecursor ist auf das erste Zeichen positioniert. * Bei Rueckkehr steht der Cursor auf dem ersten von Buchstaben * verschiedenen Zeichen. Bei den Namen spielt Gross-/Kleinschreibung * keine Rolle. Bei erfolgreicher Suche wird der Index in der Namens- * tabelle zurueckgegeben, sonst -1. * der Eingabecursor steht auf dem ersten Zeichen nach dem Namen. */ // returns: index in entryNameList // : -1 if not found - (int)entryName { int i=0; // must be < 127 SkipSpace; fieldLoc.start = HERE; while( isNameChar(c) ){ nameBuffer[i++] = c; NextByte; } nameBuffer[i] = '\0'; fieldLoc.length =i; for( i=0; i<[entryNameList count]; i++ ){ if( ! NXOrderStrings( (const unsigned char *)nameBuffer, (const unsigned char *)[entryNameList objectAt:i], NO, -1, NULL ) ) return i; } return -1; } /* - (int)fieldName ----------------------- * Diese Methode soll reservierte Namen (fieldnames) * erkennen. Der Eingabecursor ist auf das erste Zeichen positioniert. * Bei Rueckkehr steht der Cursor auf dem ersten von Buchstaben * verschiedenen Zeichen. Bei den Namen spielt Gross-/Kleinschreibung * keine Rolle. Bei erfolgreicher Suche wird der Index in der Namens- * tabelle zurueckgegeben, sonst -1. * der Eingabecursor steht auf dem ersten Zeichen nach dem Namen. */ // returns: index in fieldNameList (== index in configList) // : -1 if not found - (int)fieldName { int i=0; // must be < 127 while( isNameChar(c) ){ nameBuffer[i++] = c; NextByte; } // if( i==0 ) NIX GEFUNDEN nameBuffer[i] = '\0'; for( i=0; i<[fieldNameList count]; i++ ){ if( ! NXOrderStrings( (const unsigned char *)nameBuffer, (const unsigned char *)[fieldNameList objectAt:i], NO, -1, NULL ) )return i; // index in fieldNameList == index in configList } return -1; } /* - (int)skipFrom:: ----------------------- * Der Eingabecursor steht auf der oeffnenden Klammer '(' oder '{' * Es wird die zugehoerende schliessende Klammer gesucht. Danach * steht der Cursor auf dem ersten Zeichen nach der schliessenden * Klammer. */ - (int)skipFrom:(char)leftDel To:(char)rightDel { int level=1; NextByte; // skip the leftDel character while( level && c!= EOF ){ if( c==leftDel ) level++; if( c==rightDel ){ level--; if( level==0 ) break; // found the matching rightDel character } NextByte; } if( c==EOF ) { [self markError:5]; return HERE; } //NextByte; // first char just behind ')' or '}' resp. return HERE; // position of rightDel character } /* - getKey ----------------------- * Der Eingabecursor zeigt auf das erste Zeichen des key (oder SPACE). * * Lamport says (in his guide an user's manual): * key = sequence of letters, digits, punctuationcharacters (without comma ) * bibtex didn't read the book and allows ALL characters * exept comma and white_space. * * If the key is part of an abbreviation (@STRING) then key * starts with a letter and does not contain a space or any * of the ten characters (see the macro 'isOneOfTheTen()' * * bei return zeigt der Eingabecursor auf das erste Zeichen, das * nicht mehr zu key gehoert. */ - getKey { SkipSpace; // find the first char of key // myKey is ivar of the TeXObject myKey.start = HERE; if( myType==STRING ){ // is first character a letter ? while( isNameChar(c) ) NextByte; } else while( (c!=',' ) && (c!=EOF) && !NXIsSpace(c) ) NextByte; if( c==EOF ){ [self markError:5]; return self; } myKey.length = HERE - myKey.start; if( myKey.length==0 ){ [self markError:3]; } return self; } - readText { int delim; int level=0; BOOL isQuoted = NO; SkipSpace; switch( c ){ case '"' : delim = '"'; isQuoted = YES; break; case LBRACE : delim = RBRACE; isQuoted = YES; break; case ',' :{ // text is empty return self; } default : delim = outerDelimiter; // not quoted } if( isQuoted == YES ){ NextByte; // skip quote-character while( c!= delim || level>0 ){ if( c==LBRACE ) level++; if( c==RBRACE ) level--; NextByte; } NextByte; // skip delimiter } else{ // unquoted text | abbreviation --> convertRawText // unquoted text = string of digits // abbreviation = string of characters without THE_TEN and space // (abbreviation must start with a letter) if( NXIsDigit(c) ){ // read digits while( NXIsDigit(c) )NextByte; } else{ // if( NXIsAlpha(c) ){ // abbreviation // while( isNameChar(c) ) } if( YES ){ // abbreviation while( (c!=',') && c!=EOF && !NXIsSpace(c) && c!=delim ) NextByte; } else fprintf( stderr, "bloeder Text in Zeile %d\n", lineNumber ); } } return self; } // returns: index in fieldNameList (== index in configList) // location can be found in fieldLoc // location is stored in configList (Storage class) // returns: -1 if not found - (int)readFieldName { int tok, rtn; tok = [self fieldName]; if( tok<0 ){ rtn = NXRunAlertPanel("BibTeX-Parser", // title "fieldname >%s< unknown in line %d", // message " is wrong ", // 1: default button NX_ALERTDEFAULT " is valid ", // 0: alternate " NX_ALERTALTERNATE NULL, //-1: other " NX_ALERTOTHER nameBuffer, lineNumber ); if( rtn==NX_ALERTALTERNATE ){ // is valid [preferences addCustomField:nameBuffer]; tok = [fieldNameList count] - 1; } else return -1; } return tok; } - (int)readFieldTextFor:(int)tok { configListType *item; if( tok<0 ) return -1; // init loop over concatenated strings SkipSpace; if( (c==',') || (c==EOF) ){ // empty string return -1; } item = ((configListType*)[configList elementAt:tok]); item->start = HERE; // index of first char of the string // loop over concatenated strings while( YES ){ [self readText]; SkipSpace; if( c=='#' ){ // concatenation NextByte; // skip '#' } else break; } // store location in appropriate fields item->length = HERE - item->start; fieldLoc.start = item->start; fieldLoc.length = item->length; return tok; } // returns: index in fieldNameList (== index in configList) // location can be found in fieldLoc // location is stored in configList (Storage class) // returns: -1 if not found - (int)readField { int tok; tok = [self readFieldName]; if( tok<0 ) return -1; SkipSpace; if( c != '=' ){ [self markError:6]; return -1; } NextByte; // skip '=' return [self readFieldTextFor:tok]; } // returnValue = -1 skip // returnValue = -2 abort (stop reading) // returnValue >= 0 defined entrytype - (int)defineEntryname { id errorInspector = [ErrorInspector new]; //[self callTheEditorForFile:[owner fullPath] inLine:lineNumber]; return [errorInspector setError:nameBuffer inLine:lineNumber for:self]; } /* - pickUpItem * reads only : entrytype and key. The rest is skipped. * address of first character and the length of the entry are stored * in ivar range */ - pickUpItem { int eoEntry; char delimiter; locType range; int here = HERE; while( YES ){ [self skipComment]; if( (c==EOF) || (c==0) ) return nil; range.start = soEntry = HERE; firstLine = lineNumber; NextByte; // skip '@' if( (myType=[self entryName]) <0 ){ if( (myType = [self defineEntryname]) <0 ){ [self markError:2]; [owner parsingReport:myType]; // abort or skip continue; } [owner parsingReport:-1]; // myType was changed } locInput.start = soEntry; SkipSpace; switch( c ){ case LBRACE : delimiter = RBRACE; break; case LPARA : delimiter = RPARA; break; default : [self markError:4]; continue; } break; // normal end of 'while' } // c is left delimiter here = HERE; NextByte; // skip delimiter if( (myType==PREAMBLE) || (myType==COMMENT) ){ // they have no key myKey.start = fieldLoc.start-1; // should start with '@' myKey.length = fieldLoc.length+1; // "@PREAMBLE", "@COMMENT" is the key (better than nothing) } else [self getKey]; // ... and now skip the rest [self positionTo:here]; // reposition NextByte; // c is again left delimiter eoEntry = [self skipFrom:c To:delimiter]; range.length = eoEntry - soEntry + 1; locInput.length = range.length; return self; } - defineType:(int)theType { myType = theType; return self; } // entryType is stored in myType // all fieldEntries (location and length) are stored in configList - parseSelf { int tok; int i; int delimPos; configListType *item; BOOL isNew; lineNumber = firstLine; c = ' '; // dummy // clear location entries in configList for( i=0; i<[configList count]; i++ ){ // clear location item = ((configListType*)[configList elementAt:i]); item->start = 0; item->length = 0; } isNew = NO; //locInput.start==0; if( isNew ){ if( [self pickUpItem]==nil ) return nil; } else [self positionTo:locInput.start]; NextByte; SkipSpace; if( c!='@' ){ [self markError:1]; return nil; } soEntry = HERE; NextByte; // skip '@ [self entryName]; // use the type already set in pickUpItem tok = myType; // or is redefined in defineType: SkipSpace; switch( c ){ case LBRACE : outerDelimiter = RBRACE; break; case LPARA : outerDelimiter = RPARA; break; default : [self markError:4]; [self skipComment]; return nil; } // c is delimiter delimPos = HERE; // position of the delimitercharacter NextByte; // skip delimiter if( (tok==PREAMBLE) ){ // preamble has no key myKey.start = fieldLoc.start-1; // should start with '@' !!!!!! myKey.length = fieldLoc.length+1; [self readFieldTextFor:NOTE]; return self; } if( (tok==COMMENT) ){ // COMMENT has no key myKey.start = fieldLoc.start-1; // should start with '@' !!!!!! myKey.length = fieldLoc.length+1; item = ((configListType*)[configList elementAt:NOTE]); item->start = delimPos + 1; item->length = [self skipFrom:c To:outerDelimiter] - delimPos - 1; // length without the delimiter characters return self; } [self getKey]; SkipSpace; if( tok==STRING ){ // abbreviation == key (see: getKey) if( c=='=' )NextByte else [self markError:7]; // und dann ??? [self readFieldTextFor:NOTE]; return self; } if( c==',' )NextByte else{ [self markError:8]; while( (c!=',') && (c!=EOF) && (c!=outerDelimiter) )NextByte; if( c==',' )NextByte; } // read all fields SkipSpace; while( c!=outerDelimiter ){ int fType = [self readField]; if( fType<0 ) return self; SkipSpace; // must now be ',' or outerDelimiter if( !( (c==',') || (c==outerDelimiter) ) ){ NXRunAlertPanel("Bibliography", // title " error in line %d\n" "I was expecting a',' or '%c', I'm skipping whatever " "remains of this entry ", " OK ", // 1: default button NULL, // 0: alternate NULL, //-1: other lineNumber, outerDelimiter ); // ... and now skip the rest [self positionTo:locInput.start]; // reposition [self pickUpItem]; // skip return self; } if( c==',' )NextByte; SkipSpace; } return self; } - (int)delimiter { return outerDelimiter; } /********** conversion methods **************/ static char *inBuffer; static int inIndex; static char *outBuffer; static int outIndex; static int lastChar; #define skipRawBlank {while(NXIsSpace(c)){c=inBuffer[inIndex++];}} #define fnct_skipRawBlank \ {while(NXIsSpace(lastChar)){lastChar=inBuffer[inIndex++];}} - convertRawText { int delim; int level=0; BOOL isQuoted = NO; skipRawBlank; switch( c ){ case '"' : delim = '"'; isQuoted = YES; break; case LBRACE : delim = RBRACE; isQuoted = YES; break; case ',' : // text is empty return self; default : delim = outerDelimiter; // not quoted } if( isQuoted == YES ){ c=inBuffer[inIndex++]; // skip quote-character while( c!= delim || level>0 ){ if( c==LBRACE ) level++; if( c==RBRACE ) level--; outBuffer[outIndex++] = c; c=inBuffer[inIndex++]; } c=inBuffer[inIndex++]; // skip delimiter } else{ // unquoted text | abbreviation // unquoted text = string of digits // abbreviation = string of characters without ',' and space // --> NO // abbreviation = string of characters without THE_TEN and space // --> NO // (abbreviation must start with a letter) if( NXIsDigit(c) ){ // read digits while( NXIsDigit(c) ){ outBuffer[outIndex++] = c; c=inBuffer[inIndex++]; } } else{ if( YES || NXIsAlpha(c) ){ // abbreviation outBuffer[outIndex++] = '@'; while( isNameChar(c) ){ outBuffer[outIndex++] = c; c=inBuffer[inIndex++]; } } else fprintf( stderr,"unmoeglicher Text\n%s\n", inBuffer ); } } return self; } void fnct_convertRawText() { int delim = 0; int level=0; BOOL isQuoted = NO; fnct_skipRawBlank; switch( lastChar ){ case '"' : delim = '"'; isQuoted = YES; break; case LBRACE : delim = RBRACE; isQuoted = YES; break; case ',' : // text is empty return; default : delim = EOF; //outerDelimiter } if( isQuoted == YES ){ lastChar=inBuffer[inIndex++]; // skip quote-character while( lastChar!= delim || level>0 ){ if( lastChar==LBRACE ) level++; if( lastChar==RBRACE ) level--; outBuffer[outIndex++] = lastChar; lastChar=inBuffer[inIndex++]; } lastChar=inBuffer[inIndex++]; // skip delimiter } else{ // unquoted text | abbreviation // unquoted text = string of digits // abbreviation = string of characters without ',' and space // --> NO // abbreviation = string of characters without THE_TEN and space // --> NO // (abbreviation must start with a letter) if( NXIsDigit(lastChar) ){ // read digits while( NXIsDigit(lastChar) ){ outBuffer[outIndex++] = lastChar; lastChar=inBuffer[inIndex++]; } } else{ if( YES || NXIsAlpha(lastChar) ){ // abbreviation outBuffer[outIndex++] = '@'; while( isNameChar(lastChar) ){ outBuffer[outIndex++] = lastChar; lastChar=inBuffer[inIndex++]; } } else fprintf( stderr,"unmoeglicher Text\n%s\n", inBuffer ); } } } - convertRawField:(locType)loc to:(char *)buf { int fLength, fMaxLength; char origChar; if( loc.length==0 ){ buf[0] = '\0'; return self; } [self positionTo:loc.start]; NXGetMemoryBuffer( theStream, &inBuffer, &fLength, &fMaxLength ); inIndex = loc.start; origChar = inBuffer[loc.start+loc.length]; inBuffer[loc.start+loc.length] = '\0'; outBuffer = buf; outIndex = 0; c = ' '; // loop over concatenated strings skipRawBlank; while( YES ){ [self convertRawText]; skipRawBlank; if( c=='#' ){ // concatenation c=inBuffer[inIndex++]; // skip '#' outBuffer[outIndex++] = ' '; } else break; } inBuffer[loc.start+loc.length] = origChar; // restore outBuffer[outIndex] = '\0'; return self; } // converts null-terminated string // only used to copy a macro text void fnct_convertRawString( char *str, char *buf ) { if( str[0] == '\0' ){ buf[0] = '\0'; return; } inIndex = 0; // index of first character inBuffer = str; outIndex = 0; outBuffer = buf; lastChar = ' '; // loop over concatenated strings fnct_skipRawBlank; while( YES ){ fnct_convertRawText(); fnct_skipRawBlank; if( lastChar=='#' ){ // concatenation lastChar=inBuffer[inIndex++]; // skip '#' } else break; } outBuffer[outIndex] = '\0'; } @end
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.