2rtf_parse.c

This is 2rtf_parse.c in view mode; [Download] [Up]
/*  
 parse.c of

 2rtf: a facility to convert files in the ATK file format to files compatible with the RTF format.

 Scribetext is copyright (c) 1989, 1990 by the Massachusetts Institute of
 Technology.

 RTF is a product of the Microsoft Corporation.

 Permission to use, copy, modify, and distribute this software and
 its documentation for any purpose and without fee is hereby granted,
 provided that the above copyright notice and the name of the author(s)
 appear in all copies; that both that copyright notice, the name of
 the author(s) and this permission notice appear in supporting
 documentation; and that the name of the Massachusetts Institute of
 Technology not be used in advertising or publicity pertaining to
 distribution of the software without specific, written prior
 permission.  The Massachusetts Institute of Technology makes no
 representations about the suitability of this software for any purpose.
 It is provided "as is" without express or implied warranty.

 2rtf was written by Scott Rixner, rixner@ATHENA.MIT.EDU and Jeremy Paul Kirby, jpkirby@ATHENA.MIT.EDU

 $Header: /usr2/multimedia/RCS/MMEdit//2rtf_parse.c,v 3.1 92/12/09 15:04:08 suzuki Exp $
*/


#include <stdio.h>
#include <ctype.h>
#include <string.h>
#import  <defaults/defaults.h>
#include "2rtf.h"
#import "stvec.h"
#import <streams/streams.h>
#import  "data_types.h"

extern struct TableStruct *Table;
extern struct StyleStackStruct *Style;

void CloseFiles(), AbsorbSpace();
void RStyleApply(), Newlines(), exit();
long int ParseText();
long int richParseText();
long int plainParseText();

int ReplaceText();

int big=0;
int nobig=0;
int wrote=0;

#define fgetc(x) NXGetc(x)
#define fputc(x,stream) NXPutc(stream,x)
#define fputs(x,stream) NXPrintf(stream,x)
#define ungetc(x,y) NXUngetc(y)
#define fprintf NXPrintf


void ParseMain()
/*
 *
 *  Handle standard beginning to all rtf files.  Make main
 *  call to ParseText(), Close files when done.  Print
 *  useful messages to users.
 *
 */
{
  CurrLine=1;

  ParseText(EOF, NORMAL, PRINTTOFILE);
}

void richParseMain()
{
  CurrLine=1;

  big=0;
  nobig=0;
  richParseText(EOF, NORMAL, PRINTTOFILE);
}

void plainParseMain()
{
  CurrLine=1;

  plainParseText(EOF, NORMAL, PRINTTOFILE);
}

long int plainParseText(tofind, transform, action)
     int tofind, transform, action;
{
   int c;
   

   NXSeek(fin, (long)0, NX_FROMSTART);
   while((c = fgetc(fin)) != EOF) {
	switch((char) c ){
		case '\r' : 
		   break;	
		case '\\' : 
                   fprintf(fout, "\\" );
		   break;	
		case '\377' : 
		   break;	
		case '\n' : 
                   fprintf(fout, "\r\n\\par\r\n " );
		   break;	
               default:
	           fputc(c, fout);
        }
   }
  return(0);
}

long int richParseText(tofind, transform, action)
     int tofind, transform, action;
{
   char tmp_instruction[TMP_SIZE], *GetInstruction(), *makelower();
   struct TableStruct *tmp, *FindNode();
   struct StyleStackStruct ;
   int Execute(), ReplaceText(), manybigger(), eatmanybigger();
   int  i, checktoken();
   char c;
   int in;
   char token[50];

//   NXSeek(fin, (long)0, NX_FROMSTART);
   while((in = fgetc(fin)) != tofind) {
	c =(char)in;
       if (c == '<') {
           for (i=0; (i<49 && (in = fgetc(fin)) 
                     && in != EOF); ++i) {
		c = (char)in;
		if ( c == '>' ) break;
		if ( c == '\377'){
		    i--;
		    continue;
		}
               token[i] = isupper(c) ? tolower(c) : c;
           }
           if (c == EOF) break;
           if (c != '>') while ((c = fgetc(fin)) != '>' && c != EOF) {;}
           if (c == EOF) break;
           token[i] = '\0';
           strcpy(tmp_instruction, token);
           strcpy(tmp_instruction, makelower(tmp_instruction));
           tmp = FindNode(EZCOLUMN, tmp_instruction);

           if (!strcmp(token, "lt")) {
               fputc('<', fout);
		wrote=0;
           } else if (!strcmp(token, "f0")
		 	|| !strcmp(token, "f1")
		 	|| !strcmp(token, "f2")
		 	|| !strcmp(token, "f3")
		 	|| !strcmp(token, "f4")
		 	|| !strcmp(token, "f5")
		 	|| !strcmp(token, "f6")
		 	|| !strcmp(token, "f7")
		 	|| !strcmp(token, "f8")
		 	|| !strcmp(token, "f9")
		 	|| !strcmp(token, "f10")
		 	|| !strcmp(token, "f11")
		 	|| !strcmp(token, "f12")
		 	|| !strcmp(token, "f13")
		 	|| !strcmp(token, "f14")
		 	|| !strcmp(token, "f15")
		 	|| !strcmp(token, "f16")
		 	|| !strcmp(token, "f17")
		 	|| !strcmp(token, "f18")
		 	|| !strcmp(token, "f19")) {
               fprintf(fout, "\\%s\n", token);
		wrote=0;
           } else if (!strcmp(token, "nl")) {
//               fputc('\n', fout);
	       fputs("\\par\n", fout);
		wrote=0;
           } else if (!strcmp(token, "/f0")
		 	|| !strcmp(token, "/f1")
		 	|| !strcmp(token, "/f2")
		 	|| !strcmp(token, "/f3")
		 	|| !strcmp(token, "/f4")
		 	|| !strcmp(token, "/f5")
		 	|| !strcmp(token, "/f6")
		 	|| !strcmp(token, "/f7")
		 	|| !strcmp(token, "/f8")
		 	|| !strcmp(token, "/f9")
		 	|| !strcmp(token, "/f10")
		 	|| !strcmp(token, "/f11")
		 	|| !strcmp(token, "/f12")
		 	|| !strcmp(token, "/f13")
		 	|| !strcmp(token, "/f14")
		 	|| !strcmp(token, "/f15")
		 	|| !strcmp(token, "/f16")
		 	|| !strcmp(token, "/f17")
		 	|| !strcmp(token, "/f18")
		 	|| !strcmp(token, "/f19")) {
			;
//           } else if (!strcmp(token, "footing")) {
//               fputs("{\\fs18\\up6 \\chftn {\\footnote \\pard\\plain\n", fout);
//               fputs("\\s246 \\fs20 {\\fs18\\up6 \\chftn }", fout);
//               while((c = fgetc(fin)) != EOF)
//               {
//                  if(c != '<' )
//                  {
//                     ungetc(c, fin);
//                     break;
//                  }
//		  fputc(c, fout);
//               }
//               fputs("}\n", fout);
//           } else if (token[0]=='/') {
//                 fputc('}', fout);
           } else if (!strcmp(token, "comment")) {
               int commct=1;
               while (commct > 0) {
                   while ((c = fgetc(fin)) != '<'
                    && c != EOF) ;
                   if (c == EOF) break;
                   for (i=0; (c = fgetc(fin)) != '>'
                      && c != EOF; ++i) {
                       token[i] = isupper(c) ?
                        tolower(c) : c;
                   }
                   if (c== EOF) break;
                   token[i] = '\0';
                   if (!strcmp(token, "/comment")) --commct;
                   if (!strcmp(token, "comment")) ++commct;
               }
           } else if (tmp != NULL){
//		    if (token[0]=='/'){
		    if (index(token,'/')){
                        if (!(tmp->mode & COMMAND)){
				if ( checktoken(token) )
                                	fputc('}',fout);
				nobig=0;
                        }else if ( !strcmp(tmp_instruction, "/bigger")){ 
//				nobig++;
//				if ( nobig >= 5){
					eatmanybigger();
		//			if( wrote == 1)
                                	fputc('}',fout);
//					nobig=0;
//					return(0);
//				} else {
//					--big;
//				}
                                //return(0);      
			
			}else{
//				nobig=0;
                                return(0);      
			}
                    }else if (!(tmp->mode & COMMAND)){  
			if( checktoken(token))
                    		fprintf(fout, "{\\%s ", tmp->rtf.word);
			wrote=0;
		    } else {

		     if (!strcmp(tmp_instruction,"bigger")){
//			 big++;
//			 if ( big >= 5){
				big=1;
				manybigger();
				wrote=0;
//				richParseText(tofind, transform, PRINTTOFILE);
//				fputs("}", fout);
//			 }else{
//                         tmp->rtf.fun(tmp_instruction, transform, tofind);
//			 }	
		     }
		     else{
			 big=0;
                         tmp->rtf.fun(tmp_instruction, transform, tofind);
			 wrote=0;
		     }

		    }
           }  /* Ignore all other tokens */
	     ; /*fprintf(fout, "<%s>", token);*/
       } else if (c == '\\'){
//	    fprintf(fout, "\\%c", c);
	    fprintf(fout, "\\");
	    wrote=1;
       } else if (c != '\r' && c !='\n' && c !='\377'){
	    fputc(c, fout);
	    wrote=1;
       }
   }
   fputc('\n', fout); /* for good measure */
//   ungetc(c, fin);
   return(CurrLine);
} 

long int ParseText(tofind, transform, action)
     int tofind, transform, action;
/*
 *
 *  tofind:     used to tell when this recursion of
 *              ParseText should fall back.
 *  transform:  used to check if individual characters
 *              should be transformed into anything.
 *              i.e. change to all caps
 *  action:     if set to PRINTTOFILE (0), normal
 *              parsing occurs.  Presently the only
 *              other option is NOP, which causes
 *              everything to be passed over until
 *              tofind is encountered, at which point
 *              it falls back.
 *
 */
{
   char ch, instruction[TMP_SIZE], tmp_instruction[TMP_SIZE], *GetInstruction(), *makelower();
   struct TableStruct *tmp, *FindNode();
   struct StyleStackStruct *stytmp;
   int Execute(), ReplaceText(), in;

   while((in = fgetc(fin)) != tofind)
   {
      ch = (char) in;
      if(paragraph && ch != '\\')
         paragraph = 0;
      if(ch == '\\')
      {
         ch = (char) fgetc(fin);
	 if((ch == '\\' || ch == '{' || ch == '}') && (!action))
            fprintf(fout, "\\%c", ch);
         else if(isspace(ch))
            continue;
	 else
	 {
	    ungetc(ch, fin);
	    strcpy(tmp_instruction, GetInstruction());
	    strcpy(tmp_instruction, makelower(tmp_instruction));
	    tmp = FindNode(EZCOLUMN, tmp_instruction);
	    if(tmp == NULL && (!action))
	    {
               /* Check if command is in stylesheet before
                  declaring an error. */

               stytmp = Style;
               while(stytmp != NULL)
               {
                  if(!strcmp(tmp_instruction, stytmp->name))
                     break;
                  stytmp = stytmp->next;
	       }

               if(stytmp == NULL)
               {
                 sprintf(instruction, "\\%s", tmp_instruction);
       	         fprintf(ferr, "* EZ command %s not recognized\n", instruction);
 	         fputs(instruction, fout);
	       }
               else
               {
                  RStyleApply(stytmp);
	       }
            }
	    else if(tmp != NULL && !action)
               if(Execute(tmp_instruction, transform) == tofind)
	          break;
	 }
      }   
      else if(ch == '\r' || ch == '\n')
         Newlines();
      else if(ch == '\t')
         fputs("\\tab ", fout);
      else if(!action)
      {
         if(!transform)
            fputc(ch, fout);
         else if(transform == INDEX && ch == '+')
         {
            ch = (char) fgetc(fin);
            if(ch == '+')
	       fputs("\\:", fout);
            else
            {
               ungetc(ch, fin);
               fputs("+", fout);
	    }
         }
         else if(transform == HEADER && ch == '$')
         {
            ch = (char) fgetc(fin);
            if(ch == '$')
               fputs("$", fout);
            else
            {
		sprintf(tmp_instruction, "$");
		while(isalpha(ch))
		{
		    sprintf(tmp_instruction, "%s%c", tmp_instruction, ch);
		    ch = (char) fgetc(fin);
		}
		ungetc(ch, fin);
                strcpy(tmp_instruction, makelower(tmp_instruction));
	        tmp = FindNode(EZCOLUMN, tmp_instruction);
                if(tmp != NULL)
                {
                   if(!(tmp->mode & COMMAND))
                      fprintf(fout, "\\%s ", tmp->rtf.word);
                   else
                      tmp->rtf.fun(tmp_instruction, transform, tofind);
		}
                else
                   fputs(tmp_instruction, fout);
	    }
	 }
	 else if(transform == LOWERCASE && isupper(ch))
	    fputc(tolower(ch), fout);
	 else if(transform == CAPS && islower(ch))
	    fputc(toupper(ch), fout);
	 else
	    fputc(ch, fout);
      }	
   }
  
   if(tofind == EOF)
      return(CurrLine++);
/*   else if(tofind < 256)
   {
      ch = (char) fgetc(fin);
      if(ch != '\n' && ch != '\r')
	 ungetc(ch, fin);
      else
         Newlines();
   }*/

   return(CurrLine);
}


struct TableStruct *FindNode(field, string)
     int field;
     char *string;
/*
 *
 *  Look up string in internal translation table.
 *
 */
{
  struct TableStruct *tmp=Table;

  if(field == EZCOLUMN)
    {
      while(tmp != NULL)
	{
	  if(!strcmp(tmp->ezword, string))
	    return(tmp);
	  else
	    tmp = tmp->next;
	}
      return(NULL);
    }
  
  if(field == RTFCOLUMN)
    {
      while(tmp != NULL)
	{
	  if(!strcmp(tmp->rtf.word, string))
	    return(tmp);
	  else
	    tmp = tmp->next;
	}
      return(NULL);    
    }

  return(0);
}

char *GetInstruction()
/*
 *
 *  Read in commands that occur after "\".
 *
 */
{
  char character, *instruction;
  int in;

  instruction = (char *) calloc (TMP_SIZE, sizeof(char));

  while((in = fgetc(fin)) != EOF)
    {
      character = (char) in;
      if(character == '{' || character == '\\')
      {
        ungetc(character, fin);
        return(instruction);
      }
      else if(character == ' ' || character == '\t')
        return(instruction);
      else if(character == '\n' || character == '\r')
      {
        Newlines();
	return(instruction);
      }
      else
	sprintf(instruction, "%s%c", instruction, character);
    }
  
    return(instruction);
  
}


int Execute(instruction, transform)
     char *instruction;
     int transform;
/*
 *
 *  Handle instruction read by GetInstruction() accordingly.
 *
 */
{
  struct TableStruct *tmp;
  char ch;

  tmp = FindNode(EZCOLUMN, instruction);

  if(tmp->mode & QUOTEDCHAR)
    fputc(tmp->rtf.quote, fout);
  else if(tmp->mode & NAKED)
    {
      if(!(tmp->mode & COMMAND))
	ReplaceText(tmp->rtf.word, tmp->mode, transform, ' ');
      else
	return(tmp->rtf.fun(instruction, transform, ' '));
    }
  else
    {
      ch = ' ';
      while(ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
	{
	  ch = (char) fgetc(fin);
	  if(ch=='\n' || ch=='\r')
	    Newlines();
	}
      
      if(ch == '{')
	 if(!(tmp->mode & COMMAND))
	    ReplaceText(tmp->rtf.word, tmp->mode, transform, '}');
	 else
	    return(tmp->rtf.fun(instruction, transform, '}'));
      else
	{
	  fprintf(ferr, "* Fatal error:\n* %s: Lack of parameters for command %s\n%s", me,
		  tmp->ezword, "Program terminating");
	  exit(0);
	}
    }
   return(0);
}


int ReplaceText(instruction, mode, transform, tofind)
     char *instruction, tofind;
     int mode, transform;
/*
 *
 *  Make simple replacement of text if ezword was not
 *  mapped to a command in the transtable.
 *
 */
{
  if(mode & NAKED)
     fprintf(fout, "\n\\%s ", instruction);
  else
  {
     fprintf(fout, "\\%s ", instruction);

     richParseText(tofind, transform, PRINTTOFILE);

     fputs("\\pard\\plain ", fout);
  }
  return(0);
}

NXStream *CreatertfHead(stream, fonttblheader)
NXStream *stream;
char *fonttblheader;
{
    const char *fontNameAndSize;
    char fontName[128];
    float fontSize;

  if (!fonttblheader){
    if ((fontNameAndSize = NXGetDefaultValue(OWNER, FONT)) != 0 ){
        sscanf(fontNameAndSize, "%127[^:]:%f", fontName, &fontSize);
    } else {
 	fontSize = 14.0;
        strcpy(fontName, "FixedRyuminCourier-Light");
    }

  fprintf(stream, "{\\rtf%d\\mac\\deff%d ", RTFVersion, NEW_YORK);
  fprintf(stream, "{\\fonttbl");
//  fprintf(stream, "{\\f%d\\fswiss Chicago;}", CHICAGO);
  fprintf(stream, "{\\f0\\fnil %s;}", fontName);
  fprintf(stream, "{\\f%d\\fswiss Chicago;}", CHICAGO + 1);
  fprintf(stream, "{\\f%d\\froman New York;}", NEW_YORK);
  fprintf(stream, "{\\f%d\\fmodern Monaco;}", MONACO);
  fprintf(stream, "{\\f%d\\ftech Symbol;}", SYMBOL);
  fprintf(stream, "}");

  /* color table - may be omitted */

  fputs("\\ftnbj\\sectd\n", stream);
  fputs("\\linemod0\\linex0\\cols1\\endnhere ", stream);
//  fputs("\\pard\\plain ", stream);
  fprintf(stream, "\\pard\\plain\\f0\\fs%d ", 2*(int)fontSize);
  }
  else
  {
  fprintf(stream, "{\\rtf%d\\ansi", RTFVersion);
  fprintf(stream, "%s\n", fonttblheader);
  fprintf(stream, "\\margl40\n");
  fprintf(stream, "\\margl40\n");
    if ((fontNameAndSize = NXGetDefaultValue(OWNER, FONT)) != 0 ){
        sscanf(fontNameAndSize, "%127[^:]:%f", fontName, &fontSize);
    } else {
 	fontSize = 14.0;
        strcpy(fontName, "FixedRyuminCourier-Light");
    }
  fprintf(stream, "\\pard\\tx1280\\tx2560\\tx3840\\tx5120\\tx6400\\tx7680\\tx8960\\tx10240\\tx11520\\tx12800\\f0\\b0\\i0\\ul0\\fs%d\\fc0", 2*(int)(fontSize));

  }
  return(stream);
}

int manybigger(){

  int Fcount=4;
  char c;
  char *makelower();
  int i;
   char tmp_instruction[TMP_SIZE],token[50];

   while((c = fgetc(fin)) != EOF) {
       if (c == '<' ) {
           for (i=0; (i<49 && (c = fgetc(fin)) != '>'
                     && c != EOF); ++i) {
		if ( c !='\r' || c !='\n')
                  token[i] = isupper(c) ? tolower(c) : c;
		else{
		  i--;
		  continue;
		}
	       if (i == 0) {
			if (c == 'b'){
			   c = fgetc(fin);
			   if (c == 'i'){
				ungetc(c, fin);
				continue;
			   }
			   else{
				ungetc(c, fin);
				ungetc(c, fin);
				ungetc(c, fin);
   		fprintf(fout, "{\\fs%d ", Fcount+(int)(State.CurFontSize));
       		return(0);
			   }
			}else{
				ungetc(c, fin);
				ungetc(c, fin);
   		fprintf(fout, "{\\fs%d ", Fcount+(int)(State.CurFontSize));
       		return(0);
			}
		}
           }
           if (c == EOF) break;
           if (c != '>') while ((c = fgetc(fin)) != '>' && c != EOF) {;}
           if (c == EOF) break;
           token[i] = '\0';
           strcpy(tmp_instruction, token);
           strcpy(tmp_instruction, makelower(tmp_instruction));
	   ++big;
	   if (!strcmp(tmp_instruction,"bigger"))
		Fcount += 4;
       }
       else if ( c == '\r' || c== '\n') {
		continue;
	}
	else{
   		ungetc(c,fin);
   		fprintf(fout, "{\\fs%d ", Fcount+(int)(State.CurFontSize));
       		return(0);
	}
   }
   ungetc(c,fin);
   fprintf(fout, "{\\fs%d ", Fcount+(int)(State.CurFontSize));
   return(0);
}

int eatmanybigger(){

  char c;
  char *makelower();
  int i;
  char token[50];

   while((c = fgetc(fin)) != EOF) {
       if (c == '<' ) {
           for (i=0; (i<49 && (c = fgetc(fin)) != '>'
                     && c != EOF); ++i) {
               token[i] = isupper(c) ? tolower(c) : c;
	       if (i == 0) {
			if (c == '/'){
			   if ( (c = fgetc(fin)) == 'b' ){
			       if ( (c = fgetc(fin)) == 'i' )
				continue;
			       else{
				   ungetc(c, fin);
				   ungetc(c, fin);
				   ungetc(c, fin);
				   ungetc(c, fin);
			   	   return(0);
				}
			    }
			    else{
				ungetc(c, fin);
				ungetc(c, fin);
				ungetc(c, fin);
				//break;
				return(0);
			   }
			}
			else{
				ungetc(c, fin);
				ungetc(c, fin);
				//break;
				return(0);
			}
		}
           }
	   big--;
           if (big == 0) break;
           if (c == EOF) break;
           if (c != '>') while ((c = fgetc(fin)) != '>' && c != EOF) {;}
           if (c == EOF) break;
           token[i] = '\0';
//           strcpy(tmp_instruction, token);
//           strcpy(tmp_instruction, makelower(tmp_instruction));
//           tmp = FindNode(EZCOLUMN, tmp_instruction);
       }
       else if (c == '\r' || c == '\n' ) {
		continue;
	}
	else{
   		ungetc(c,fin);
       		break;
	}
   }
   return(0);
}

int bold=0, italic=0, supers=0,subs=0, center=0;
int fleft=0, fright=0,smaller=0, underl=0;

int checktoken(token)
char *token;
{

	if (!strcmp(token, "bold") ){
		bold++;
		return(1);
	}else if (!strcmp(token, "italic")){
		italic++;
		return(1);
	}else if (!strcmp(token, "underline")){
		underl++;
		return(1);
	}else if (!strcmp(token, "center")){
		center++;
		return(1);
	}else if (!strcmp(token, "flushleft")){
		fleft++;
		return(1);
	}else if (!strcmp(token, "flushright")){
		fright++;
		return(1);
	}else if (!strcmp(token, "subscript")){
		subs++;
		return(1);
	}else if (!strcmp(token, "superscript")){
		supers++;
		return(1);
	}else if (!strcmp(token, "/bold") ){
		if ( bold-- > 0 )
			return(1);
		else
			bold=0;
			return(0);
	}else if (!strcmp(token, "/italic")){
		if ( italic-- > 0)
			return(1);
		else
			italic=0;
			return(0);
	}else if (!strcmp(token, "/underline")){
		if ( underl-- > 0)
			return(1);
		else
			underl=0;
			return(0);
	}else if (!strcmp(token, "/center")){
		if ( center-- > 0)
			return(1);
		else
			center=0;
			return(0);
	}else if (!strcmp(token, "/flushleft")){
		if ( fleft-- > 0)
			return(1);
		else
			fleft=0;
			return(0);
	}else if (!strcmp(token, "/flushright")){
		if ( fright-- > 0)
			return(1);
		else
			fright=0;
			return(0);
	}else if (!strcmp(token, "/subscript")){
		if ( subs-- > 0)
			return(1);
		else
			subs=0;
			return(0);
	}else if (!strcmp(token, "/superscript")){
		if ( supers-- > 0)
			return(1);
		else
			supers=0;
			return(0);
	}
	return(0);
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.