ftp.nice.ch/pub/next/unix/text/rtf.N.bsd.tar.gz#/rtf-utilities/rubric.c

This is rubric.c in view mode; [Download] [Up]

/*
 * rubric [-i] [-o] [-c styles] [-w words] [files...]
 *
 *  ru-bric \'ruČ-brik, -,brik\ n
 *  [ME rubrike red ocher, heading in red letters of part of a book,
 *  fr. MF rubrique, fr. L rubrica, fr. rubr-, ruber red] (14c)
 *  1: a heading of a part of a book or manuscript done or
 *     underlined in a color (as red) different from the rest...
 *
 * Hilight the specified words in RTF files; e.g.,
 *    rubric -c red -w Hawley -c bold,0,29,29 -w NeXT file.rtf
 *
 * rewrites 'file.rtf' so that occurrences of 'Hawley' are set in red,
 * and 'NeXT' in a dark slate boldface green.
 * "styles" are comma-separated strings of colors, fonts, or RTF directives.
 * A color may be a comma-separated RGB triple (on a scale of 0-255),
 * or one of the following:
 *    red,green,blue,yellow,darkred,darkgreen,darkblue,darkyellow
 * Styles may also include the keywords "bold" and "italic",
 * a font name preceded by a slash, ("/Adobe-Hirsutulous")
 * or an arbitrary RTF directive ("\\f2\\fs36").
 * "Words" are one or more comma-separated strings or regular expressions
 * (enclosed in "/.../").  In the text, simple words are delimited by
 * white space, punctuation, or start/end of line.
 * If the '-i' option is given, case is ignored in matching.
 * The '-o' option forces output to stdout.
 *
 * The effect is to append fonts and colors to the fonttbl and colortbl,
 * and to then surround occurrences of words or expressions with {\... }.
 * The latter can be done with a careful "sed" script, but additions to
 * the font and color tables, as well as the handling of the common instance
 * of simple word-highlighting, are more conveniently done here.
 *
 * If one or more files (or "rtfd" directories) are given as arguments,
 * these are rewritten in place (a copy of the old file is saved in /tmp).
 * Otherwise, rubric acts as a filter, and writes to the standard output.
 *
 * Bugs & pratfalls:
 *    - the notion of a regular expression at start-of-line ("/^.../")
 *      is not accurate in the context of an RTF file.
 *    - there should be a convenient way to retrieve colors by name
 *      from the default color lists; e.g., '-c "Cadmium Yellow"'
 *    - if the '-o' option is given with more than one rtf file,
 *      results will not be correct; the files should be 'rtfcat-ed' first.
 *    - does not "really" parse RTF; NeXT implementation of RTF incomplete.
 *
 *
 * Like the program? Then fork over the vaporware fee:
 * scan some currency and e-mail the tiff files to the address below
 * for my digital coin collection.
 *
 *
 * Michael Hawley
 * MIT Media Laboratory
 * 20 Ames Street
 * Cambridge, MA 02139
 * mike@media-lab.mit.edu
 *
 * Copyright (c) MIT Media Laboratory, January 1993.
 * This program may be used freely for non-profit pursuits.
 * Please send improvements to the author, and see the general
 * copyright notice in the README file.
 *
 */

#include <stdio.h>
#define Case break; case
#define Default break; default
static char *_arg, *_argp; /* use by 'for_each_argument */
static char *av0;       /* will hold name of the command */
#define argument  (_arg=(*_argp? _argp : av[++i==ac? --i : i]),_argp+=strlen(_argp),_arg)
#define for_each_argument av0 = av[0]; for (i=1;i<ac && *av[i]=='-';i++)\
                        for (_argp = &av[i][1]; *_argp;)\
                                switch(*_argp++)
char *malloc(), *index();

#include <sys/types.h>
#include <sys/stat.h>

int isDirectory(char *f){ /* true if file 'f' is a directory */
    struct stat b;
    stat(f, &b);
    return b.st_mode & S_IFDIR;
}

int error(a,b,c,d) int a,b,c,d; { /* printf an error msg */
    fprintf(stderr,(char *)a,b,c,d); fprintf(stderr,"\n");
    return 0;
}

int System(fmt, a,b,c,d,e,f,g)
    char *fmt,*a,*b,*c,*d,*e,*f,*g;
/*
 * "printf" a system call, gripe if it failed.
 */
{
    extern int system(char *s);
    char t[2048];
    int i;
    sprintf(t,fmt,a,b,c,d,e,f,g);
    i = system(t);
    if (i) error("bad command: %s",t);
    return !i;
}

char *
save(s) char *s; { /* save a copy of 's' and return the pointer */
    char *t;
    if (!s || !*s) return NULL;
    t = malloc(strlen(s)+1);
    if (t) strcpy(t,s);
    return t;
}

char *
strindex(s,t) char *s, *t; { /* return ptr to first match of 't' in 's' */
    int n = strlen(t);
    if (s) while (*s)
            if (!strncmp(s, t, n)) return s;
            else s++;
    return (char *)0;
}


#include <ctype.h>
#include <regex.h>

#define MaxStyles 1024
#define MaxStr    2048

struct {
    char *color;
    int   colornum;
    char *font;
    int   fontnum;
    char *style;
} Style[MaxStyles];
int CurStyle = 0;
int FN = 0, CN = 0;

#define MaxWords 512

struct {
    int style;
    int delim;
    struct regex r;
} Word[MaxWords];
int CurWord = 0;

int FontOffset = 0, ColorOffset = 0, IgnoreCase = 0, WriteToStdout = 0;

addStyle(s)
    char *s;
/*
 * 's' is a comma-separated list of styles, e.g., "red,bold,/Helvetica"
 * Parse these and incorporate in the 'Style[...]' table.
 * Each "-c ..." option calls 'addStyle(...)' to add an entry to the table.
 */
{
    int C[3], cn=3;
    char *p = s, t[1024]="", f[256]="", c[256]="";
#define If(x) if (strcmp(s,x)==0)
    while (p){
        s = p;
        if (p=index(p,',')) *p++ = '\0';
        if (isdigit(*s)){ C[cn%3] = atoi(s), cn++; continue; }
        else if (*s == '/')  { strcpy(f,s+1); continue; }
        else If("bold") strcat(t,"\\b");
        else If("italic") strcat(t,"\\i"); /* can you think of any others? */
             /* this should be implemented via named colors lists */
        else If("red")        { C[0]=255; C[1]=C[2] = 0; cn=6; continue; }
        else If("green")      { C[1]=255; C[0]=C[2] = 0; cn=6; continue; }
        else If("blue")       { C[2]=255; C[0]=C[1] = 0; cn=6; continue; }
        else If("yellow")     { C[0]=C[1]=255; C[2] = 0; cn=6; continue; }
        else If("darkred")    { C[0]=128; C[1]=C[2] = 0; cn=6; continue; }
        else If("darkgreen")  { C[1]=128; C[0]=C[2] = 0; cn=6; continue; }
        else If("darkblue")   { C[2]=128; C[0]=C[1] = 0; cn=6; continue; }
        else If("darkyellow") { C[0]=C[1]=160; C[2] = 0; cn=6; continue; }
        else strcat(t,s);
    }
    if (cn != 3) sprintf(c,"\\red%d\\green%d\\blue%d",C[0],C[1],C[2]);
    Style[CurStyle].color = save(c); if (*c) Style[CurStyle].colornum= CN++;
    Style[CurStyle].font  = save(f); if (*f) Style[CurStyle].fontnum = FN++;
    Style[CurStyle].style = save(t);
    CurStyle++;
}

char *
ignorecase(s)
    char *s;
/*
 * Convert 's' from "abc" to "[aA][bB][cC]" to effect
 * case-insensitive regex matching (n.b., some nebbish
 * seems to have broken the "fold" option to re_compile()).
 */
{
    char t[1024], *p = t, *st = s;
    int push=0;
#define lower(c) (islower(c)? c : tolower(c))
#define upper(c) (isupper(c)? c : toupper(c))
    while (*s){
        if (isalpha(*s) && !push){
	    *p++ = '[';
	    *p++ = lower(*s);
	    *p++ = upper(*s);
	    *p++ = ']';
        } else{
            *p = *s;
            if (*p == '[' && p>t && p[-1] != '\\') push=1;
            if (push && *p == ']' && p>t && p[-1] != '\\') push=0;
            p++;
        }
        s++;
    }
    *p = '\0';
    strcpy(st,t);
}

compile(s, delim)
    char *s;
    int delim;
/*
 * Compile 's' as a regular expression and add it to the 'Word[...]' table.
 * 'delim' is true if 's' is delimited by "/.../" (i.e., a real regex)
 * otherwise it's zero, indicating 's' was derived from a "word."
 */
{
    Word[CurWord].style = CurStyle>0? CurStyle-1 : CurStyle;
    Word[CurWord].delim = delim;
    bcopy(re_compile(s,0),&(Word[CurWord].r),sizeof(struct regex));
    CurWord++;
}

char *WordDelim = "[^a-zA-Z0-9]";  /* "words" are delimited by non-alphanums */

addWords(s)
    char *s;
/*
 * 's' is a comma-separated list of words or regular expressions.
 * Parse these, enhance the regular expressions (i.e., derive some
 * plausible ones for "words") and compile the expressions in the table.
 */
{
    char *p, *next = s;
    while (next && *next){
        p = s = next;
        if (*p == '/'){
          Again:
            s = index(s+1,'/');
            if (s && s[-1]=='\\') goto Again;
            if (!s) return error("bad format: %s",p);
            next = (s[1] == ',')? s+2 : NULL;
        } else {
            s = index(s,',');
            next = s? s + 1 : NULL;
        }
        if (s) *s = '\0';
        if (*p == '/'){
            char t[1024];
            strcpy(t,p+1);
            if (IgnoreCase) ignorecase(t);
            compile(t,1);
        } else {
            char t[1024], u[1024];
            strcpy(u,p);
            if (IgnoreCase) ignorecase(u);
            sprintf(t,"^%s%s",u,WordDelim); compile(t,0);
            sprintf(t,"%s%s%s",WordDelim,u,WordDelim); compile(t,0);
        }
    }
}

rewrite(s)
    char *s;
/*
 * 's' is a line of input.  Check the 'Word[...]' expressions
 * and perform the requisite stylistic changes.
 * This is essentially a global substitution within the line.
 */
{
    char t[MaxStr], *p = t, q[1024], *u;
    int i, n;
    struct regex *r;

    for (i=0;i<CurWord;i++){
        r = &(Word[i].r);
        u = s;
        while (re_match(u,r)==1){
            if (!Word[i].delim){
                while (!isalnum(r->start[0])) r->start++;
                while (!isalnum(r->end[-1])) r->end--;
            }
            strncpy(t,u,r->start-u);
            p = t + (r->start - u);
            strncpy(q,r->start,r->end-r->start); q[r->end-r->start]='\0';
            n = Word[i].style;
#define str(x) ((x && x[0])? x : "")
            sprintf(p,"{");
            if (Style[n].font)  sprintf(p+strlen(p),"\\f%d",
                                        Style[n].fontnum+FontOffset);
            if (Style[n].style) sprintf(p+strlen(p),"%s",str(Style[n].style));
            if (Style[n].color) sprintf(p+strlen(p),"\\fc%d",
                                        Style[n].colornum+ColorOffset);
            sprintf(p+strlen(p)," %s}",q);
            n = strlen(t);
            strcpy(p+strlen(p),r->end);
            strcpy(u,t);
            u += n;
        }
    }
}

filterRTF(in,out)
    FILE *in, *out;
/*
 * copy RTF from 'in' to 'out', emending the font and color tables,
 * (noting the proper FontOffset and ColorOffset), and performing the
 * regular-expression-driven substitutions to rearrange the styles
 * of given words or expressions.
 */
{
    char s[MaxStr*2], *p, *q, *x;
    int i, wroteTables = (CN || FN)? 0 : 1;
    
    FontOffset = 0, ColorOffset = 0;

    while (fgets(s,sizeof(s),in)){
        p = s;
        if (!wroteTables){
             if (FN && recmp("{\\\\fonttbl",p)==0){
              /* time for the font table */
                 x = p;
                 p = strindex(p,"fonttbl");
                 q = index(p,'}');
                 while (!q){
                     fgets(p+strlen(p),1024,in);
                     q = index(p,'}');
                 }
                 if (q){
                   while ((p=strindex(p,"\\f")) && p < q){
                     i = atoi(p+=2);
                     if (i>=FontOffset) FontOffset = i+1;
                   }
                   p = q; *p++ = '\0';
                   fputs(x,out);
                   for (i=0;i<CurStyle;i++){
                       if (Style[i].font){
                           fprintf(out,"\\f%d\\fnil %s;",
                                   Style[i].fontnum+FontOffset,
                                   Style[i].font);
                       }
                   }
                   fprintf(out,"}");
                   FN = 0;
                   if (!CN) wroteTables = 1;
                 } else
                   error("%s: warning, couldn't write font table!",av0);
             }
             if (CN && (recmp("\\\\pard",p)==0||recmp("{\\\\colortbl",p)==0)){
              /* time for the color table */
                 x = p;
                 p = strindex(p,"colortbl");
                 ColorOffset = 1;
                 if (p){
			q = index(p,'}');
			while (!q){
			    fgets(p+strlen(p),1024,in);
			    q = index(p,'}');
			}
			if (q){
                          p = index(p,';');
			  while ((p=index(p+1,';')) && p < q)
			    ColorOffset++;
			  p = q; *p = '\0';
			  fputs(x,out);
			  for (i=0;i<CurStyle;i++){
			    if (Style[i].color){
				fprintf(out,"%s;",Style[i].color);
			    }
			  }
			  fprintf(out,"}");
                        }
                   } else {
                        p = strindex(x,"\\pard");
                        *p++ = '\0';
                        fprintf(out,"{\\colortbl;");
			for (i=0;i<CurStyle;i++){
			    if (Style[i].color){
				fprintf(out,"%s;",Style[i].color);
			    }
			}
			fprintf(out,"}\\");
                   }
                   CN = 0;
                   if (!FN) wroteTables = 1;
             }
        }
        if (*p){ /* now rewrite the line for any substitutions */
	    rewrite(p);
	    fputs(p,out);
        }
     }
}

char *
tail(char *s){
    char *p = (char *)rindex(s,'/');
    if (!p) p = s;
    return p;
}

rewriteRTFFile(s)
    char *s;
/*
 * Rewrite rtf or rtfd file 's' to effect the given changes.
 * A copy of the old file is saved in /tmp.
 * Fails if for some reason it can't create files there.
 */
{
    FILE *in,*out;
    char t[1024], u[1024];
    static int x = 0;

    if (access(s,0)) return error("%s: couldn't read '%s'",av0,s);

    if (isDirectory(s)){
        sprintf(u,"%s/TXT.rtf",s);
        sprintf(t,"/tmp/%s.%d.rtfd",tail(s),getpid(),x++);
        System("mkdir %s; cp %s %s",t,u,t);
        strcat(t,"TXT.rtf");
        s = u;
    } else {
        sprintf(t,"/tmp/%s.%d%d",tail(s),getpid(),x++);
        System("cat %s > %s",s,t);
    }
    in = fopen(t,"r");
    if (!in) return error("%s: couldn't backup '%s' to '%s'",av0,s,t);
    out = WriteToStdout? stdout : fopen(s,"w");
    if (!out) return fclose(in), error("%s: couldn't write '%s'",av0,s);
    filterRTF(in,out);
    fclose(in);
    fclose(out);
}

use(){
#define E error
    E("use: %s [-i] [-o] [-c style...] [-w words...] [files or stdin]",av0);
    E("highlight words in an RTF file, or make other format changes.");
    E("  -c  comma-separated colors, fonts, keywords, or rtf directives;");
    E("      colors may be written as 'r,g,b' values as in '128,0,0'");
    E("        or one of: red green blue yellow darkred darkgreen etc...");
    E("      font names are preceded with '/', as in '/Sabon-Roman'");
    E("      keywords are: bold italic");
    E("      \"raw\" rtf directives are written with a backslash '\\b\\i'");
    E("      e.g.:  -c red,bold,\\fs36,/Sabon-Roman");
    E("  -w  comma-separated words or regular expressions enclosed in /.../");
    E("  -i  case-insensitive matching.");
    E("  -o  force output to stdout.");
    exit(1);
}

main(ac,av) char *av[]; {
    int i;

    for_each_argument {
    case 'c': addStyle(argument);
    Case 'w': addWords(argument);
    Case 'i': IgnoreCase++;
    Case 'o': WriteToStdout++;
    Default : use();
    }

    if (i==ac) filterRTF(stdin,stdout);
    else       while (i<ac) rewriteRTFFile(av[i++]);

    exit(0);        
}

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.