This is rubric.c in view mode; [Download] [Up]
/* * rubric [-i] [-o] [-c styles] [-w words] [files...] * * ru-bric \'ruČ-brik, -,brik\ n * [ME rubrike red ocher, heading in red letters of part of a book, * fr. MF rubrique, fr. L rubrica, fr. rubr-, ruber red] (14c) * 1: a heading of a part of a book or manuscript done or * underlined in a color (as red) different from the rest... * * Hilight the specified words in RTF files; e.g., * rubric -c red -w Hawley -c bold,0,29,29 -w NeXT file.rtf * * rewrites 'file.rtf' so that occurrences of 'Hawley' are set in red, * and 'NeXT' in a dark slate boldface green. * "styles" are comma-separated strings of colors, fonts, or RTF directives. * A color may be a comma-separated RGB triple (on a scale of 0-255), * or one of the following: * red,green,blue,yellow,darkred,darkgreen,darkblue,darkyellow * Styles may also include the keywords "bold" and "italic", * a font name preceded by a slash, ("/Adobe-Hirsutulous") * or an arbitrary RTF directive ("\\f2\\fs36"). * "Words" are one or more comma-separated strings or regular expressions * (enclosed in "/.../"). In the text, simple words are delimited by * white space, punctuation, or start/end of line. * If the '-i' option is given, case is ignored in matching. * The '-o' option forces output to stdout. * * The effect is to append fonts and colors to the fonttbl and colortbl, * and to then surround occurrences of words or expressions with {\... }. * The latter can be done with a careful "sed" script, but additions to * the font and color tables, as well as the handling of the common instance * of simple word-highlighting, are more conveniently done here. * * If one or more files (or "rtfd" directories) are given as arguments, * these are rewritten in place (a copy of the old file is saved in /tmp). * Otherwise, rubric acts as a filter, and writes to the standard output. * * Bugs & pratfalls: * - the notion of a regular expression at start-of-line ("/^.../") * is not accurate in the context of an RTF file. * - there should be a convenient way to retrieve colors by name * from the default color lists; e.g., '-c "Cadmium Yellow"' * - if the '-o' option is given with more than one rtf file, * results will not be correct; the files should be 'rtfcat-ed' first. * - does not "really" parse RTF; NeXT implementation of RTF incomplete. * * * Like the program? Then fork over the vaporware fee: * scan some currency and e-mail the tiff files to the address below * for my digital coin collection. * * * Michael Hawley * MIT Media Laboratory * 20 Ames Street * Cambridge, MA 02139 * mike@media-lab.mit.edu * * Copyright (c) MIT Media Laboratory, January 1993. * This program may be used freely for non-profit pursuits. * Please send improvements to the author, and see the general * copyright notice in the README file. * */ #include <stdio.h> #define Case break; case #define Default break; default static char *_arg, *_argp; /* use by 'for_each_argument */ static char *av0; /* will hold name of the command */ #define argument (_arg=(*_argp? _argp : av[++i==ac? --i : i]),_argp+=strlen(_argp),_arg) #define for_each_argument av0 = av[0]; for (i=1;i<ac && *av[i]=='-';i++)\ for (_argp = &av[i][1]; *_argp;)\ switch(*_argp++) char *malloc(), *index(); #include <sys/types.h> #include <sys/stat.h> int isDirectory(char *f){ /* true if file 'f' is a directory */ struct stat b; stat(f, &b); return b.st_mode & S_IFDIR; } int error(a,b,c,d) int a,b,c,d; { /* printf an error msg */ fprintf(stderr,(char *)a,b,c,d); fprintf(stderr,"\n"); return 0; } int System(fmt, a,b,c,d,e,f,g) char *fmt,*a,*b,*c,*d,*e,*f,*g; /* * "printf" a system call, gripe if it failed. */ { extern int system(char *s); char t[2048]; int i; sprintf(t,fmt,a,b,c,d,e,f,g); i = system(t); if (i) error("bad command: %s",t); return !i; } char * save(s) char *s; { /* save a copy of 's' and return the pointer */ char *t; if (!s || !*s) return NULL; t = malloc(strlen(s)+1); if (t) strcpy(t,s); return t; } char * strindex(s,t) char *s, *t; { /* return ptr to first match of 't' in 's' */ int n = strlen(t); if (s) while (*s) if (!strncmp(s, t, n)) return s; else s++; return (char *)0; } #include <ctype.h> #include <regex.h> #define MaxStyles 1024 #define MaxStr 2048 struct { char *color; int colornum; char *font; int fontnum; char *style; } Style[MaxStyles]; int CurStyle = 0; int FN = 0, CN = 0; #define MaxWords 512 struct { int style; int delim; struct regex r; } Word[MaxWords]; int CurWord = 0; int FontOffset = 0, ColorOffset = 0, IgnoreCase = 0, WriteToStdout = 0; addStyle(s) char *s; /* * 's' is a comma-separated list of styles, e.g., "red,bold,/Helvetica" * Parse these and incorporate in the 'Style[...]' table. * Each "-c ..." option calls 'addStyle(...)' to add an entry to the table. */ { int C[3], cn=3; char *p = s, t[1024]="", f[256]="", c[256]=""; #define If(x) if (strcmp(s,x)==0) while (p){ s = p; if (p=index(p,',')) *p++ = '\0'; if (isdigit(*s)){ C[cn%3] = atoi(s), cn++; continue; } else if (*s == '/') { strcpy(f,s+1); continue; } else If("bold") strcat(t,"\\b"); else If("italic") strcat(t,"\\i"); /* can you think of any others? */ /* this should be implemented via named colors lists */ else If("red") { C[0]=255; C[1]=C[2] = 0; cn=6; continue; } else If("green") { C[1]=255; C[0]=C[2] = 0; cn=6; continue; } else If("blue") { C[2]=255; C[0]=C[1] = 0; cn=6; continue; } else If("yellow") { C[0]=C[1]=255; C[2] = 0; cn=6; continue; } else If("darkred") { C[0]=128; C[1]=C[2] = 0; cn=6; continue; } else If("darkgreen") { C[1]=128; C[0]=C[2] = 0; cn=6; continue; } else If("darkblue") { C[2]=128; C[0]=C[1] = 0; cn=6; continue; } else If("darkyellow") { C[0]=C[1]=160; C[2] = 0; cn=6; continue; } else strcat(t,s); } if (cn != 3) sprintf(c,"\\red%d\\green%d\\blue%d",C[0],C[1],C[2]); Style[CurStyle].color = save(c); if (*c) Style[CurStyle].colornum= CN++; Style[CurStyle].font = save(f); if (*f) Style[CurStyle].fontnum = FN++; Style[CurStyle].style = save(t); CurStyle++; } char * ignorecase(s) char *s; /* * Convert 's' from "abc" to "[aA][bB][cC]" to effect * case-insensitive regex matching (n.b., some nebbish * seems to have broken the "fold" option to re_compile()). */ { char t[1024], *p = t, *st = s; int push=0; #define lower(c) (islower(c)? c : tolower(c)) #define upper(c) (isupper(c)? c : toupper(c)) while (*s){ if (isalpha(*s) && !push){ *p++ = '['; *p++ = lower(*s); *p++ = upper(*s); *p++ = ']'; } else{ *p = *s; if (*p == '[' && p>t && p[-1] != '\\') push=1; if (push && *p == ']' && p>t && p[-1] != '\\') push=0; p++; } s++; } *p = '\0'; strcpy(st,t); } compile(s, delim) char *s; int delim; /* * Compile 's' as a regular expression and add it to the 'Word[...]' table. * 'delim' is true if 's' is delimited by "/.../" (i.e., a real regex) * otherwise it's zero, indicating 's' was derived from a "word." */ { Word[CurWord].style = CurStyle>0? CurStyle-1 : CurStyle; Word[CurWord].delim = delim; bcopy(re_compile(s,0),&(Word[CurWord].r),sizeof(struct regex)); CurWord++; } char *WordDelim = "[^a-zA-Z0-9]"; /* "words" are delimited by non-alphanums */ addWords(s) char *s; /* * 's' is a comma-separated list of words or regular expressions. * Parse these, enhance the regular expressions (i.e., derive some * plausible ones for "words") and compile the expressions in the table. */ { char *p, *next = s; while (next && *next){ p = s = next; if (*p == '/'){ Again: s = index(s+1,'/'); if (s && s[-1]=='\\') goto Again; if (!s) return error("bad format: %s",p); next = (s[1] == ',')? s+2 : NULL; } else { s = index(s,','); next = s? s + 1 : NULL; } if (s) *s = '\0'; if (*p == '/'){ char t[1024]; strcpy(t,p+1); if (IgnoreCase) ignorecase(t); compile(t,1); } else { char t[1024], u[1024]; strcpy(u,p); if (IgnoreCase) ignorecase(u); sprintf(t,"^%s%s",u,WordDelim); compile(t,0); sprintf(t,"%s%s%s",WordDelim,u,WordDelim); compile(t,0); } } } rewrite(s) char *s; /* * 's' is a line of input. Check the 'Word[...]' expressions * and perform the requisite stylistic changes. * This is essentially a global substitution within the line. */ { char t[MaxStr], *p = t, q[1024], *u; int i, n; struct regex *r; for (i=0;i<CurWord;i++){ r = &(Word[i].r); u = s; while (re_match(u,r)==1){ if (!Word[i].delim){ while (!isalnum(r->start[0])) r->start++; while (!isalnum(r->end[-1])) r->end--; } strncpy(t,u,r->start-u); p = t + (r->start - u); strncpy(q,r->start,r->end-r->start); q[r->end-r->start]='\0'; n = Word[i].style; #define str(x) ((x && x[0])? x : "") sprintf(p,"{"); if (Style[n].font) sprintf(p+strlen(p),"\\f%d", Style[n].fontnum+FontOffset); if (Style[n].style) sprintf(p+strlen(p),"%s",str(Style[n].style)); if (Style[n].color) sprintf(p+strlen(p),"\\fc%d", Style[n].colornum+ColorOffset); sprintf(p+strlen(p)," %s}",q); n = strlen(t); strcpy(p+strlen(p),r->end); strcpy(u,t); u += n; } } } filterRTF(in,out) FILE *in, *out; /* * copy RTF from 'in' to 'out', emending the font and color tables, * (noting the proper FontOffset and ColorOffset), and performing the * regular-expression-driven substitutions to rearrange the styles * of given words or expressions. */ { char s[MaxStr*2], *p, *q, *x; int i, wroteTables = (CN || FN)? 0 : 1; FontOffset = 0, ColorOffset = 0; while (fgets(s,sizeof(s),in)){ p = s; if (!wroteTables){ if (FN && recmp("{\\\\fonttbl",p)==0){ /* time for the font table */ x = p; p = strindex(p,"fonttbl"); q = index(p,'}'); while (!q){ fgets(p+strlen(p),1024,in); q = index(p,'}'); } if (q){ while ((p=strindex(p,"\\f")) && p < q){ i = atoi(p+=2); if (i>=FontOffset) FontOffset = i+1; } p = q; *p++ = '\0'; fputs(x,out); for (i=0;i<CurStyle;i++){ if (Style[i].font){ fprintf(out,"\\f%d\\fnil %s;", Style[i].fontnum+FontOffset, Style[i].font); } } fprintf(out,"}"); FN = 0; if (!CN) wroteTables = 1; } else error("%s: warning, couldn't write font table!",av0); } if (CN && (recmp("\\\\pard",p)==0||recmp("{\\\\colortbl",p)==0)){ /* time for the color table */ x = p; p = strindex(p,"colortbl"); ColorOffset = 1; if (p){ q = index(p,'}'); while (!q){ fgets(p+strlen(p),1024,in); q = index(p,'}'); } if (q){ p = index(p,';'); while ((p=index(p+1,';')) && p < q) ColorOffset++; p = q; *p = '\0'; fputs(x,out); for (i=0;i<CurStyle;i++){ if (Style[i].color){ fprintf(out,"%s;",Style[i].color); } } fprintf(out,"}"); } } else { p = strindex(x,"\\pard"); *p++ = '\0'; fprintf(out,"{\\colortbl;"); for (i=0;i<CurStyle;i++){ if (Style[i].color){ fprintf(out,"%s;",Style[i].color); } } fprintf(out,"}\\"); } CN = 0; if (!FN) wroteTables = 1; } } if (*p){ /* now rewrite the line for any substitutions */ rewrite(p); fputs(p,out); } } } char * tail(char *s){ char *p = (char *)rindex(s,'/'); if (!p) p = s; return p; } rewriteRTFFile(s) char *s; /* * Rewrite rtf or rtfd file 's' to effect the given changes. * A copy of the old file is saved in /tmp. * Fails if for some reason it can't create files there. */ { FILE *in,*out; char t[1024], u[1024]; static int x = 0; if (access(s,0)) return error("%s: couldn't read '%s'",av0,s); if (isDirectory(s)){ sprintf(u,"%s/TXT.rtf",s); sprintf(t,"/tmp/%s.%d.rtfd",tail(s),getpid(),x++); System("mkdir %s; cp %s %s",t,u,t); strcat(t,"TXT.rtf"); s = u; } else { sprintf(t,"/tmp/%s.%d%d",tail(s),getpid(),x++); System("cat %s > %s",s,t); } in = fopen(t,"r"); if (!in) return error("%s: couldn't backup '%s' to '%s'",av0,s,t); out = WriteToStdout? stdout : fopen(s,"w"); if (!out) return fclose(in), error("%s: couldn't write '%s'",av0,s); filterRTF(in,out); fclose(in); fclose(out); } use(){ #define E error E("use: %s [-i] [-o] [-c style...] [-w words...] [files or stdin]",av0); E("highlight words in an RTF file, or make other format changes."); E(" -c comma-separated colors, fonts, keywords, or rtf directives;"); E(" colors may be written as 'r,g,b' values as in '128,0,0'"); E(" or one of: red green blue yellow darkred darkgreen etc..."); E(" font names are preceded with '/', as in '/Sabon-Roman'"); E(" keywords are: bold italic"); E(" \"raw\" rtf directives are written with a backslash '\\b\\i'"); E(" e.g.: -c red,bold,\\fs36,/Sabon-Roman"); E(" -w comma-separated words or regular expressions enclosed in /.../"); E(" -i case-insensitive matching."); E(" -o force output to stdout."); exit(1); } main(ac,av) char *av[]; { int i; for_each_argument { case 'c': addStyle(argument); Case 'w': addWords(argument); Case 'i': IgnoreCase++; Case 'o': WriteToStdout++; Default : use(); } if (i==ac) filterRTF(stdin,stdout); else while (i<ac) rewriteRTFFile(av[i++]); exit(0); }
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.