fixrtf3.1.c

This is fixrtf3.1.c in view mode; [Download] [Up]
/***************************************************************************
*                                                                          *
* fixrtf (version 3.1)                                                     *
*                                                                          *
****************************************************************************
*                                                                          *
* Usage: fixrtf [-blr] filter file(s)_to_convert                           *
*                                                                          *
* Currently supported options:                                             *
*                                                                          *
*    -b     Back up contents of file and save as filename with '~' on end  * 
*            (default is to overwrite).                                    *
*    -l     Use filter in your home directory ~/Library/rtf_filters        *
*    -r     Translate backwards (ie mac-next filter does next to mac)      *
*                                                                          *
****************************************************************************
*                                                                          *
* This source code is free. You may do whatever you wish with it, as long  *
* as you leave the credits below and document any changes/enhancements you *
* may make.                                                                *
*                                                                          *
* I have tested this program on my own files, and it works well, with no   *
* disastrous bugs so far. I provide it, however, on an 'as is' basis, and  *
* I make no claims that it is perfect and will do exactly what you want.   *
* MAKE BACKUP COPIES BEFORE YOU USE IT!                                    *
*                                                                          *
****************************************************************************
*                                                                          *
* This program corrects certain anomalies in RTF formats for non-ascii     *
* characters. There are, as far as I know, 5 RTF formats: mac, pc, ansi,   *
* next, and something else for pc's that I no longer have information on.  *
* Basically, each platform has its own encoding of characters outside of   *
* the standard 7-bit ascii system. NeXT for example largely uses the       *
* PostScript ISOLatin I encoding vector (in release 2.0). This is very     *
* much different from the Macintosh encoding--left double quotes from a    *
* Mac RTF file appear in WriteNow on the NeXT as the onequarter fraction   *
* symbol.                                                                  *
*                                                                          *
* These 'extended ascii' characters are normally encoded in RTF as an      *
* escape sequence: \'xx where xx are two hex digits. This number is the    *
* ascii encoding for the particular platform. fixrtf basically translates  *
* these into the appropriate escapes for the platform you want, based on a *
* translation table you supply as the first argument.                      *
*                                                                          *
* That's the easy part. Now, there are (at least) two problems I am aware  *
* of currently. WriteNow is downright *defective* as an RTF reader/writer: *
* it writes some non-ascii characters *literally* into the RTF file, and   *
* does not recognize and automatically translate the extended sequences of *
* other platforms (but then that's the whole purpose of this program).     *
* Also, on the NeXT, some font-switching is going on behind you back with  *
* certain characters. For example, the partialdiff symbol, available from  *
* the keyboard whatever font you're using, is actually from the Symbol     *
* font. fixrtf does its best to find sequences of the form (switch to      *
* Symbol, one character, switch to another font), but this method is prone *
* to severe errors--the font switched 'back' to may not be the one from    *
* before the Symbol character, and if there are more than one Symbol       *
* characters in a row....                                                  *
*                                                                          *
* The translation table:                                                   *
*                                                                          *
* fixrtf uses a translation table or filter, which may reside in either    *
* the /LocalLibrary/rtf_filters, or in your home directory's library as    *
* ~/Library/rtf_filters. Filters may be bidirectional, but since there may *
* be multiple escapes leading to the same final character (prime example:  *
* WriteNow writing a uacute instead of \'f3 in the RTF file), you may want *
* to make dedicated one-way filters, and note that on the name (with a -1w *
* on the end or something). fixrtf v3 currently 'ships' with two filters,  *
* mac-next and mac-next-wn, the latter more suited for conversions *from*  *
* WriteNow on the NeXT (I was too lazy to switch the columns). And now for *
* the format....                                                           *
*                                                                          *
* Each line of the filter is of the form                                   *
*                                                                          *
*    digit sequence digit sequence comments                                *
*                                                                          *
* for example,                                                             *
*                                                                          *
*    0  \'8b  0  \'d8   atilde                                             *
*                                                                          *
* The zeros mean that the character is *not* in the Symbol font; 1 says    *
* they are. The backslash quote things are the RTF encodings for the       *
* Postscript character atilde (see the Ref manual) on the two platforms.   *
* The convention is that sequences on the left, when found in the file     *
* being translated, are replaced by sequences on the right, unless the -r  *
* option is used, in which case translation quite naturally occurs in the  *
* opposite direction.                                                      *
*                                                                          *
* Please note that column one of each line *MUST* contain a digit for the  *
* entry to be properly read. Any other type of character in column one is  *
* taken to indicate the whole line as a comment.                           *
*                                                                          *
* An added feature: if the sequence entry for the 'from' column happens    *
* not to begin with \' the sequence is treated as literal; in other words  *
* fixrtf could be used as a global batch search-and-replace mechanism. It  *
* makes use of the \' for speedy access to an array indexed by ascii value *
* and does a sequential search of a smaller array for other sequences. For *
* more details, read the code.                                             *  
*                                                                          *
****************************************************************************
*                                                                          *
* BUGS:                                                                    *
*                                                                          *
* As previously stated, fixrtf will not properly translate groups of NeXT  *
* Symbol font characters to their destination equivalents. There is of     *
* course always the question of RTF control words getting munged in the    *
* translation process: that, in my opinion, would require writing a full-  *
* blown RTF reader/writer, a task I am not currently up to. This program   *
* intended as a low-end fix to what can be a very obnoxious problem.       *
*                                                                          *
* And last, but not least, DO NOT USE THIS on files that contain large     *
* passages of text in the Symbol font! I have not had the opportunity to   *
* see what the encoding for this font is, but I am sure it differs both    *
* across and within platforms (in the sense that symbols don't correspond  *
* to 'standard' keyboard layout.                                           *
*                                                                          *
****************************************************************************
*                                                                          *
* written by: Nik A Gervae                                                 *
*             832 Packard St., #2                                          *
*             Ann Arbor, MI  48104                                         *
*                                                                          *
*             Nik_Gervae@ub.cc.umich.edu     313-994-4123                  *
*                                                                          *
****************************************************************************
*                                                                          *
* last modified 1991 04 05 fr                                              *
*                                                                          *
* Version history:                                                         *
*                                                                          *
* 1991 02 11 mo     Version 1.0--a quick fix, with poor error checking and *
*                   support of only one infile, one outfile.               *
*                                                                          *
* 1991 02 13 we     Version 2.0--added multiple file support--infiles only *
*                   are specified, and are now overwritten, unless the -b  *
*                   option is given; then the original is saved with a     *
*                   tilde appended to the name.                            *
*                   Added 5 new character conversions.                     *
*                   Much better error checking added. Still doesn't check  *
*                   whether the file is indeed a Mac RTF file or not!      *
*                                                                          *
* 1991 04 04 fr     Version 3.0--added external filter files for customiz- *
*                   ability, and to handle slightly exceptional cases.     *
*                   Filters can be bidirectional. New options specify      *
*                   use of user's personal filters & reverse translation.  *
*                   At this time no support for Symbol font. Minimal RTF   *
*                   format checking (ie opening left brace)--still no      *
*                   platform check (but you know what you're doing right?) *
*                                                                          *
* 1991 04 16 tu     Version 3.1--a minor bug fix.                          *
*                                                                          *
***************************************************************************/

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>			/** for getuid() */
#include <pwd.h>				/** for getpwuid(..) */

#define BUFSIZE	2048
#define NUMSYM			30		/** size of symbol filter array */
#define NUMASCII		256	 	/** size of extended ascii filter array */
#define NUMOTHER		30		/** size of other character filer array */
#define MAXLEN			80		/** max length of filter input line */
#define PATHLEN			200		/** max length of full pathname */
#define BKNAMELEN		100		/** maximum length of backup file name */
#define BKMARK			"~"		/** this is appended to indicate backup */

#define WINDLEN			20		/** scanning window size */
#define EXCHLEN			6		/** max length of strings exchanged in tx */
#define FONTDEFLEN		10		/** length of string defining Symbol font */
#define ASCII_ESC_LEN	4		/** FULL length of escape--not just prefix */

#define LIBDIR			"/Library/rtf_filters/"
#define GLOBFILTERDIR	"/LocalLibrary/rtf_filters/"

#define FONTTABLE		"{\\fonttbl"
#define SYMBOL			"Symbol"
#define SYMBOLFONTDEF	"\\ftech Symbol"

#define BACKSLASH		'\\'
#define QUOTE			'\''
#define LBRACE			'{'
#define RBRACE			'}'
#define SCOLON			';'
#define SPACE           ' '
#define ENDCHAR         '\0'
#define NOSTRING		-1

#define UNKNOWN			"????"	/** undefined string */
#define ASCII_ESC		"\\\'"	/** RTF prefix for non-ascii characters */




/************************/
/*** GLOBAL VARIABLES ***/
/************************/

typedef struct any_ar {		/** free-form array for various characters */
	char fromStr[EXCHLEN];
	char toStr[EXCHLEN];
	int symCode;
} AnyCell;

typedef struct asc_ar {		/** ascii-code indexed array for ascii chars */
	char toStr[EXCHLEN];
	int symCode;
} AsciiCell;


AnyCell symbolSet[NUMSYM];		/** characters to translate *from* Symbol */
AsciiCell asciiSet[NUMASCII];	/** 'extended' ascii to translate *from* */
AnyCell otherSet[NUMOTHER];		/** grab bag of other chars */

char symbolFont[FONTDEFLEN];	/** the RTF string defining the Symbol font */

int backup;				/* option flags */
int library;
int reverse;

/*******************************************/
/*** FUNCTION DECLARATIONS AND HIERARCHY ***/
/*******************************************/

void usage(char **argv);

void readFilter(char *thefilter);
	int getline(FILE *theFile, char line[], int len);
	int asciiesc(char thestr[]);

int convert(char *filename, FILE *infile, FILE *outfile);
	int advance(char window[], FILE *infile, FILE *outfile);
	int translateAsciiEsc(char window[], FILE *outfile);
	int translateSymEsc(char window[], FILE *outfile);
		int isSymEsc(char window[], char theSym[]);
		int findFont(char window[]);
	int translateAny(char window[], AnyCell theSet[], int setSize,
					 FILE *outfile);

char *findFontTable(char buffer[], char *filename,
					FILE *infile, FILE *outfile);
	int strpos(char *string, char *substring);
char *findSymbolFont(char *filename, FILE *infile, FILE *outfile);

/***************************************************************************
*                                                                          *
* Parse the command arguments, open the files, set up the tables, and go!  *
*                                                                          *
***************************************************************************/


main(int argc, char *argv[])
{
	FILE *infile;				/** file to translate */
	FILE *outfile;				/** file to output; makes sense, no? */
	char tempname[L_tmpnam];	/** temporary name of output file */
	char bkname[BKNAMELEN];		/** for backup file */
	int i, j;					/** loop counters */
	int err;					/** file error flag */


	if (argc < 3)				/* Check the usage. */
		usage(argv);

	backup = 0;		/* Assume we'll be overwriting. */
	library = 0;	/* ...and using a filter in /LocalLibrary/rtf_filters. */
	reverse = 0;	/* ...and translating in column order. */

	/*** Check for options */

	for (i = 1; argv[i][0] == '-' && i < argc; ++i) {
		for (j = 1; argv[i][j] != '\0'; ++j)
			switch (argv[i][j]) {
				case 'b' : backup = 1;		/* Do back up. */
						   break;
				case 'l' : library = 1;		/* Use ~/Library/rtf_filters. */
						   break;
				case 'r' : reverse = 1;		/* Go from right to left column. */
						   break;
				default  : usage(argv);
						   break;
			} /*switch*/
	} /*for i*/

	readFilter(argv[i]);		/* Now load the translation tables. */
	++i;						/* And advance to first file to translate. */


	/*** Begin the main loop. */

	for (  ; i < argc; ++i) {

		err = 0;


		/*** Open the input file. */

		if ((infile = fopen(argv[i], "r")) == NULL) {
			perror(argv[i]);
			err = 1;
			fclose(infile);
		} /*if infile*/

		/*** Open or create the output file. */

		tmpnam(tempname);

		if ((outfile = fopen(tempname, "w")) == NULL) {
			perror(argv[0]);
			err = 1;
			fclose(outfile);
		} /*if outfile*/


		/*** If nothing's gone wrong, convert the file. */

		if (!err) {

			if (convert(argv[i], infile, outfile) == 0) {

				if (backup) {

					strncpy(bkname, argv[i], BKNAMELEN + 1 - strlen(BKMARK));
					strcat(bkname, BKMARK);

					if (rename(argv[i], bkname) != 0)
						perror(argv[i]);
					else if (rename(tempname, argv[i]) != 0)
						perror("rename temprary file: ");
				
				} /*if backup*/
				else {
					if (remove(argv[i]) != 0 )
						perror(argv[i]);
					else if (rename(tempname, argv[i]) != 0)
						perror("rename temprary file: ");
				} /*else*/
				
			} /*if convert*/
			
			else {
			
				fclose(infile);
				fclose(outfile);
				if (remove(tempname) != 0 )
					perror("remove temporary file: ");
					
			} /*else*/

		} /*if!err*/

	} /*for*/

} /*main*/


/***************************************************************************
*                                                                          *
* usage()                                                                  *
*                                                                          *
* Somebody doesn't know how to use this; tell them.                        *
*                                                                          *
***************************************************************************/


void usage(char **args)
{
	fprintf(stderr, "usage: %s [-lbr] file(s)_to_convert\n", args[0]);
	exit(1);

} /*usage*/


/***************************************************************************
*                                                                          *
* readFilter()                                                             *
*                                                                          *
* Read in the filter file specified by the first argument, and fill up the *
* translation tables.                                                      *
*                                                                          *
***************************************************************************/


void readFilter(char *thefilter)
{
	FILE *infilter;				/** filter file pointer */
	char filtername[PATHLEN];	/** full pathname of filter file */
	int filterlen;				/** length of current pathname */
	uid_t the_uid;			/** user id of person using program */
	struct passwd *upw;		/** struct containing user info for person using */
							/*  program (person logged in) */

	char theline[MAXLEN];
	int i, sym, oth;		/** array indices for tranlation tables */
	int tosym, frsym;		/** character supposed to be in Symbol font? */
	char tostr[EXCHLEN],frstr[EXCHLEN];  /** the character definitions */


	/*** Open the filter file. */

	filtername[0] = '\0';
	
	if (library) {					/*** In the user's home directory. */
		the_uid = getuid();
		upw = getpwuid(the_uid);
		strncpy(filtername, upw->pw_dir, PATHLEN);
		filterlen = strlen(filtername);
		strncat(filtername, LIBDIR, PATHLEN - filterlen);
		filterlen = strlen(filtername);
		strncat(filtername, thefilter, PATHLEN - filterlen);
	} /*if library*/

	else {							/*** In the global filter library. */
		if (thefilter[0] != '/')
			strncpy(filtername, GLOBFILTERDIR, PATHLEN);
		filterlen = strlen(filtername);
		strncat(filtername, thefilter, PATHLEN - filterlen);
	} /*else*/

	if ((infilter = fopen(filtername, "r")) == NULL) {
		perror(filtername);
		exit(-1);
	} /*if infilter*/

	/*** Initialize the arrays. */
	
	for (i = 0; i < NUMSYM; ++i) {
		symbolSet[i].fromStr[0] = NOSTRING;
		symbolSet[i].toStr[0] = NOSTRING;
		symbolSet[i].symCode = 0;
	} /*for i*/
	for (i = 0; i < NUMASCII; ++i) {
		asciiSet[i].toStr[0] = NOSTRING;
		asciiSet[i].symCode = 0;
	} /*for i*/
	for (i = 0; i < NUMOTHER; ++i) {
		otherSet[i].fromStr[0] = NOSTRING;
		otherSet[i].toStr[0] = NOSTRING;
		otherSet[i].symCode = 0;
	} /*for i*/

	sym = oth = 0;

	/*** And fill 'er up. */
	
	while (getline(infilter, theline, MAXLEN) != EOF) {

		if (!isdigit(theline[0])) continue;
		if (reverse)
			sscanf(theline, "%i %s %i %s", &tosym, &tostr, &frsym, &frstr);
		else sscanf(theline, "%i %s %i %s", &frsym, &frstr, &tosym, &tostr);
	
		if (frsym) {
			strncpy(symbolSet[sym].fromStr, frstr, EXCHLEN);
			strncpy(symbolSet[sym].toStr, tostr, EXCHLEN);
			symbolSet[sym].symCode = tosym;
			++sym;
			
		} /*if frsym*/
		
		else if ((i = asciiesc(frstr)) >= 0) {
			strncpy(asciiSet[i].toStr, tostr, EXCHLEN);
			asciiSet[i].symCode = tosym;
		} /*else if i*/
		else if (!strcmp(frstr, UNKNOWN)) continue;
		else {
			strncpy(otherSet[oth].toStr, tostr, EXCHLEN);
			strncpy(otherSet[oth].fromStr, frstr, EXCHLEN);
			otherSet[oth].symCode = tosym;
			++oth;
		} /*else*/
		
	} /*while*/

} /*readFilter*/


/***************************************************************************
*                                                                          *
* getline()                                                                *
*                                                                          *
* getline reads up to (len - 1) characters from theFile, and advances to   *
* the beginning of the next line (ie past the newline). A terminating      *
* newline occuring before (len - 1) characters will be added to the        *
* string; this way the user may check if truncation occured.               *
*                                                                          *
***************************************************************************/

int getline(FILE *theFile, char line[], int len)
{
	int c, i;
	
	i = 0;

	while (--len > 0 && (c = getc(theFile)) != EOF && c != '\n')
		line[i++] = c;
	if (c == '\n')
		line[i++] = c;
	else while ((c = getc(theFile)) != '\n' && c != EOF)
		;
		
	line[i] = '\0';

	if (c == EOF && i == 0) return EOF;
	else return i;
} /*getline*/

/***************************************************************************
*                                                                          *
* asciiesc()                                                               *
*                                                                          *
* Return the ascii value of the excape sequence, or 0 if it isn't one.     *
*                                                                          *
***************************************************************************/

int asciiesc(char thestr[])
{
	int i;

	if (thestr[0] == BACKSLASH && thestr[1] == QUOTE &&
		(isdigit(thestr[2]) || (thestr[2] >= 'a' && thestr[2] <= 'f')) &&
		(isdigit(thestr[3]) || (thestr[3] >= 'a' && thestr[3] <= 'f')) ) {

		sscanf(&thestr[2], "%2x", &i);

		return i;
	} /*if thestr*/

 return -1;

} /*asciiesc*/

/***************************************************************************
*                                                                          *
* convert()                                                                *
*                                                                          *
* Go through file, trying to translate the current first few characters    *
* into whatever is appropriate.                                            *
*                                                                          *
***************************************************************************/

int convert(char *filename, FILE *infile, FILE *outfile)
{
	char *symFontPtr;
	char window[WINDLEN + 1];	/** scan window */
	int i;						/** loop counter */
	int slide;					/** # characters translated */
	
	
	fprintf(stderr, "Converting %s...\n", filename);
	/*** Initialize the window. */

	for (i = 0; i < WINDLEN + 1; ++i) 
		window[i] = ENDCHAR;
	

	/*** Find out how the Symbol font is defined in this file. */
	/*** If it isn't define it.                                */

	symFontPtr = findSymbolFont(filename, infile, outfile);
	if (symFontPtr == NULL) {
		fprintf(stderr, "Error finding file %s\'s font table.\n", filename);
		return -1;
	} /*if*/
	else {
		strncpy(symbolFont, symFontPtr, sizeof(symbolFont));
	} /*else*/


	/*** Now just chug through and translate whatever you can. */
	
	while (advance(window, infile, outfile) != EOF) {
	
		if (slide = translateSymEsc(window, outfile))
			;
		else if (slide = translateAsciiEsc(window, outfile))
			;
		else if (slide = translateAny(window, otherSet, NUMOTHER, outfile))
		 	;
		
		/*** Get rid of whatever is being replaced. */

		for (i = 0; i < slide; ++i) {
			window[i] = ENDCHAR;
		} /*for i*/
		
	} /*while*/
	

	fclose(infile);
	fflush(outfile);
	fclose(outfile);

} /*convert*/


/***************************************************************************
*                                                                          *
* advance()                                                                *
*                                                                          *
* Output the first character of the present window, shuffle the others     *
* forward by one, and fill in the remainder of the array. This array is    *
* used as a scan 'window' (hence the name), in that the first part is      *
* checked by various transate...() routines and replaced in the outfile.   *
*                                                                          *
***************************************************************************/


int advance(char window[], FILE *infile, FILE *outfile)
{
	int i, j;			/* loop counter */


	/*** Output the first character if it hasn't been touched. */

	if (window[0] != ENDCHAR) {
		putc(window[0], outfile);
		window[0] = ENDCHAR;
	} /*if window[0]*/
		
	/*** Advance the array to fill emptied cells. */

	for (j = 0; window[j] == ENDCHAR && j < WINDLEN; ++j)
		;

	for (i = 0; j < WINDLEN; ++i, ++j) {
		window[i] = window[j];
	} /*for i*/

	/*** Now fill in the emptied spots. */

	for (  ; i < WINDLEN; ++i) 	   /* Get characters for those slots. */
		window[i] = getc(infile);

	return window[0];

} /*advance*/


/***************************************************************************
*                                                                          *
* translateAsciiEsc()                                                      *
*                                                                          *
* Use asciiesc() to find out if we're at an RTF ascii escape, and if so    *
* use the value returned as an index into the ascii escape table to find   *
* out what to replace it with. If that character should be rendered in the *
* Symbol font, add the appropriate control structures.                     *
*                                                                          *
***************************************************************************/

int translateAsciiEsc(char window[], FILE *outfile)
{
	int theEsc, i;

	if ((theEsc = asciiesc(window)) > -1) {

		if (asciiSet[theEsc].toStr[0] > NOSTRING) {
			if (asciiSet[theEsc].symCode) fprintf(outfile, "%c%s ",
												  LBRACE, symbolFont);
			fprintf(outfile, "%s", asciiSet[theEsc].toStr);
			if (asciiSet[theEsc].symCode) fprintf(outfile, "%c", RBRACE);

		} /*if asciiSet*/
		else {
			for (i = 0; i < ASCII_ESC_LEN; ++i) {
				putc(window[i], outfile);
			} /*for i*/
		} /*else*/

		return ASCII_ESC_LEN;    /* whether or not it was translated */

	} /*if theEsc*/
	
	return 0;
	
} /*translateAsciiEsc*/
			
			
/***************************************************************************
*                                                                          *
* translateSymEsc()                                                        *
*                                                                          *
* If we're at a single character in the Symbol font, see if it can be      *
* translated. The actual change is made by translateAny() (below).         *
*                                                                          *
***************************************************************************/


int translateSymEsc(char window[], FILE *outfile)
{
	char theSym[EXCHLEN];
	int slen, i;

	if ((slen = isSymEsc(window, theSym)) > 0) {

		if (translateAny(theSym, symbolSet, NUMSYM, outfile))
			;
		else {
			for (i = 0; i < slen; ++i) {
				putc(window[i], outfile);
			} /*for i*/
		} /*else*/
		return slen;
	} /*if slen*/
	
	else return 0;
	
} /*translateSymEsc*/


/***************************************************************************
*                                                                          *
* isSymEsc()                                                               *
*                                                                          *
* Just go along until you hit something that obviously can't be a Symbol   *
* sequence for one character. Legal sequences are:                         *
*                                                                          *
*       \f23 C\f22     or     {\f23 C}                                     *
*                                                                          *
* where \f23 is the defintion of the Symbol font (symbolFont below), \f22  *
* is some other font, and C is an arbitrary character (or ascii escape!).  *   
*                                                                          *
***************************************************************************/

int isSymEsc(char window[], char theSym[])
{
	int sflen;
	int braced;
	int i, j;
	
	i = 0;
	sflen = strlen(symbolFont);

	if (braced = (window[i] == LBRACE)) ++i;

	if (!strncmp(&window[i], symbolFont, sflen)) i += sflen;
	else return 0;

	if (isspace(window[i])) ++i;

	if (window[i+1] == LBRACE || window[i+1] == RBRACE ||
			 window[i+1] == BACKSLASH) {
		theSym[0] = window[i++];
	} /*if window[i+1]*/
	else if (asciiesc(&window[i])) {
		strncpy(theSym, &window[i], ASCII_ESC_LEN);
		i += ASCII_ESC_LEN;
	} /*else if asciiesc*/
	else return 0;

	if (braced) {
		if (window[i] == RBRACE) return (i + 1);
		else return 0;
	} /*if braced*/
	else if (j = findFont(&window[i])) return (i + j);
	else return 0;
	
} /*isSymEsc*/


/***************************************************************************
*                                                                          *
* findFont()                                                               *
*                                                                          *
* Find a sequence of the form '\fd...d ' where d...d is a string of        *
* consecutive decimal digits, and the space at the end is any nondigit     *
* character. A single whitespace character is considered part of the font  *
* change.                                                                  *
*                                                                          *
***************************************************************************/

int findFont(char window[])
{
	int i;

	if (window[0] == BACKSLASH && window[1] == 'f' && isdigit(window[2])) {
	
		i = 3;
		while (isdigit(window[i])) ++i;

		if (isspace(window[i])) ++i;
			
		return i;
		
	} /*if window*/
	
	else return 0;
	
} /*findFont*/

/***************************************************************************
*                                                                          *
* translateAny()                                                           *
*                                                                          *
* Go through the whole array of sequences to translate, trying to match to *
* the current window. If there's a match, write out the substiture, adding *
* Symbol font changers if necessary.                                       *
*                                                                          *
***************************************************************************/

int translateAny(char window[], AnyCell theSet[], int setSize, FILE *outfile)
{
	int i, llen;
	
	for (i = 0; i < setSize; ++i) {
		llen = strlen(theSet[i].fromStr);
		if (!strncmp(window, theSet[i].fromStr, llen)) {

			if (theSet[i].symCode) fprintf(outfile, "%c%s",
										   LBRACE, symbolFont);
			fprintf(outfile, "%s", theSet[i].toStr);
			if (theSet[i].symCode) fprintf(outfile, "%c", RBRACE);

			return llen;

		} /*if !strncmp*/

	} /*for*/

	return 0;

} /*translateAny*/
			
			
/***************************************************************************
*                                                                          *
* findSymbolFont()                                                         *
*                                                                          *
* Get the character string defining the Symbol font in the document, or    *
* append one to the end of the font table if there is no definition. This  *
* is a messy piece of code. I tried.                                       *
*                                                                          *
***************************************************************************/

char *findSymbolFont(char *filename, FILE *infile, FILE *outfile)
{
	char buffer[BUFSIZE];		/** what we're looking at */
	char symFontEntry[35];		/** what we're looking for */
	char symFont[FONTDEFLEN];	/** what we're really looking for */
	int c, i, j;				/** loop etc variables */
	int go;						/** search flag */
	
	/*** Assume we haven't found anything yet. */

	symFontEntry[0] = ENDCHAR;
	
	/*** Make sure there's a font table. */

	if (findFontTable(buffer, filename, infile, outfile) == NULL) {
		fprintf(stderr, "%s: font table not found.\n", filename);
		return NULL;
	} /*if findFontTable*/
	else {
	
		/*** Good! We found one. On to work....find that \f definition. */

		if ((i = strpos(buffer, SYMBOL)) > 0) {
			for ( ; buffer[i] != BACKSLASH  && i > 0; --i)
				;
			for ( --i; buffer[i] != BACKSLASH  && i > 0; --i)
				;
			for (j = 0; j < 10; ) {
				symFont[j++] = buffer[i++];
				if (buffer[i] == BACKSLASH) break;
			} /*for j*/

			symFont[j] = '\0';

		} /*if i*/
		
		else {
		
			/*** No such luck. Have to make up a \f definition. */
			/*** So find the highest one, and use one higher than that. */
			
			int fnum, newf;		/** definition we're looking at */
			
			fnum = newf = 0;
			
			go = 1;
			i = 0;
			
			while (go) {
				for ( ; !isdigit(buffer[i]) && buffer[i] != ENDCHAR; ++i)
					;
				if (buffer[i] == ENDCHAR) break;
				sscanf(&buffer[i], "%d", &newf);
				if (newf > fnum)
					fnum = newf;
				for ( ; isdigit(buffer[i]) && buffer[i] != ENDCHAR; ++i)
					;
				if (buffer[i] == '\0') break;
			} /*while go*/
			
			fnum++;
			sprintf(symFont, "\\f%d", fnum);
			
			sprintf(symFontEntry, "{%s%s;}", SYMBOLFONTDEF, symFont);

		} /*else*/
		
		/*** Write out the table with the new entry. */
		
		for (i = 0; buffer[i] != ENDCHAR && i < sizeof(buffer); ++i)
			;
		if (buffer[i] == ENDCHAR) {
			buffer[--i] = ENDCHAR;	/*** Strip the last '}'. */
		} /*if buffer[i]*/
		else return NULL;		/*** Fatal error. */
		strcat(buffer, symFontEntry);
		strcat(buffer, "}");
		
		fprintf(outfile, "%s", buffer);

		return symFont;
			
	} /*else -- symbol table was found*/
	
	/*** I warned you. A messy piece of code. */
	
} /*findSymbolFont*/

/***************************************************************************
*                                                                          *
* findFontTable()                                                          *
*                                                                          *
* Look for the sequence {\fonttbl in the document following the first      *
* brace.                                                                   *
*                                                                          *
***************************************************************************/

char *findFontTable(char buffer[], char *filename, FILE *infile, FILE *outfile)
{
	int c, i;			/** loop counters etc */
	int braceLevel;		/** should be obvious */
	
	
	/*** Read up to the first RTF group; dump characters to output file. */
   
	if ((c = getc(infile)) != '{') {
		fprintf(stderr, "%s is obviously not an RTF file.\n", filename);
		return NULL;
	} /*if c*/
	
	else putc(c, outfile);

	/*** Go up to the first '{' past the beginning of the file. */
	
	while ((c = getc(infile)) != '{' && c != EOF) {
		if (c == '}') {
			fprintf(stderr, "%s: bad RTF format; ", filename);
			fprintf(stderr, "brace in wrong place.\n");
			return NULL;
		} /*if c*/
		else putc(c, outfile);
	} /*while c*/
	
	if (c == EOF) {
		fprintf(stderr, "%s: RTF format may be wrong.\n", filename);
		return NULL;
	} /*if c*/

	/*** Otherwise c is a '{'. */
	
	/*** Now find the font table. */
   
	braceLevel = 1;					/*** We've found one so far. */
	i = 0;
	buffer[i++] = c;
		
	while ((c = getc(infile)) != EOF && braceLevel > 0 && i < BUFSIZE - 1) {
		
		/*to me: Implement \n conversions here; replace buffer[i++] = c;.*/
		
		buffer[i++] = c;
		
		if (c == '{') ++braceLevel;
		else if (c == '}') --braceLevel;
	} /*while c*/
	
	ungetc(c, infile);
	
	buffer[i++] = '\0';

	if (i == BUFSIZE) {
		fprintf(stderr, "Buffer overflow.\n");
		return NULL;
	} /*if i*/

	if (braceLevel > 0 || c == EOF) {
		fprintf(stderr, "Can't understand RTF format; braces may be wrong.\n");
		return NULL;
	}		
	
	return strstr(buffer, "\\fonttbl");

} /*findFontTable*/

/***************************************************************************
*                                                                          *
* strpos()                                                                 *
*                                                                          *
* Get the pointer to the substring and subtract the original string's      *
* pointer to get its ordinal position.                                     *
*                                                                          *
***************************************************************************/

int strpos(char *string, char *substring)
{
	char *res;					/** result of strstr() */
	
	if ((res = strstr(string, substring)) == NULL)
		return -1;
	else return (res - string);
	
} /*strpos*/
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.