This is rubric.c in view mode; [Download] [Up]
/*
* rubric [-i] [-o] [-c styles] [-w words] [files...]
*
* ru-bric \'ruČ-brik, -,brik\ n
* [ME rubrike red ocher, heading in red letters of part of a book,
* fr. MF rubrique, fr. L rubrica, fr. rubr-, ruber red] (14c)
* 1: a heading of a part of a book or manuscript done or
* underlined in a color (as red) different from the rest...
*
* Hilight the specified words in RTF files; e.g.,
* rubric -c red -w Hawley -c bold,0,29,29 -w NeXT file.rtf
*
* rewrites 'file.rtf' so that occurrences of 'Hawley' are set in red,
* and 'NeXT' in a dark slate boldface green.
* "styles" are comma-separated strings of colors, fonts, or RTF directives.
* A color may be a comma-separated RGB triple (on a scale of 0-255),
* or one of the following:
* red,green,blue,yellow,darkred,darkgreen,darkblue,darkyellow
* Styles may also include the keywords "bold" and "italic",
* a font name preceded by a slash, ("/Adobe-Hirsutulous")
* or an arbitrary RTF directive ("\\f2\\fs36").
* "Words" are one or more comma-separated strings or regular expressions
* (enclosed in "/.../"). In the text, simple words are delimited by
* white space, punctuation, or start/end of line.
* If the '-i' option is given, case is ignored in matching.
* The '-o' option forces output to stdout.
*
* The effect is to append fonts and colors to the fonttbl and colortbl,
* and to then surround occurrences of words or expressions with {\... }.
* The latter can be done with a careful "sed" script, but additions to
* the font and color tables, as well as the handling of the common instance
* of simple word-highlighting, are more conveniently done here.
*
* If one or more files (or "rtfd" directories) are given as arguments,
* these are rewritten in place (a copy of the old file is saved in /tmp).
* Otherwise, rubric acts as a filter, and writes to the standard output.
*
* Bugs & pratfalls:
* - the notion of a regular expression at start-of-line ("/^.../")
* is not accurate in the context of an RTF file.
* - there should be a convenient way to retrieve colors by name
* from the default color lists; e.g., '-c "Cadmium Yellow"'
* - if the '-o' option is given with more than one rtf file,
* results will not be correct; the files should be 'rtfcat-ed' first.
* - does not "really" parse RTF; NeXT implementation of RTF incomplete.
*
*
* Like the program? Then fork over the vaporware fee:
* scan some currency and e-mail the tiff files to the address below
* for my digital coin collection.
*
*
* Michael Hawley
* MIT Media Laboratory
* 20 Ames Street
* Cambridge, MA 02139
* mike@media-lab.mit.edu
*
* Copyright (c) MIT Media Laboratory, January 1993.
* This program may be used freely for non-profit pursuits.
* Please send improvements to the author, and see the general
* copyright notice in the README file.
*
*/
#include <stdio.h>
#define Case break; case
#define Default break; default
static char *_arg, *_argp; /* use by 'for_each_argument */
static char *av0; /* will hold name of the command */
#define argument (_arg=(*_argp? _argp : av[++i==ac? --i : i]),_argp+=strlen(_argp),_arg)
#define for_each_argument av0 = av[0]; for (i=1;i<ac && *av[i]=='-';i++)\
for (_argp = &av[i][1]; *_argp;)\
switch(*_argp++)
char *malloc(), *index();
#include <sys/types.h>
#include <sys/stat.h>
int isDirectory(char *f){ /* true if file 'f' is a directory */
struct stat b;
stat(f, &b);
return b.st_mode & S_IFDIR;
}
int error(a,b,c,d) int a,b,c,d; { /* printf an error msg */
fprintf(stderr,(char *)a,b,c,d); fprintf(stderr,"\n");
return 0;
}
int System(fmt, a,b,c,d,e,f,g)
char *fmt,*a,*b,*c,*d,*e,*f,*g;
/*
* "printf" a system call, gripe if it failed.
*/
{
extern int system(char *s);
char t[2048];
int i;
sprintf(t,fmt,a,b,c,d,e,f,g);
i = system(t);
if (i) error("bad command: %s",t);
return !i;
}
char *
save(s) char *s; { /* save a copy of 's' and return the pointer */
char *t;
if (!s || !*s) return NULL;
t = malloc(strlen(s)+1);
if (t) strcpy(t,s);
return t;
}
char *
strindex(s,t) char *s, *t; { /* return ptr to first match of 't' in 's' */
int n = strlen(t);
if (s) while (*s)
if (!strncmp(s, t, n)) return s;
else s++;
return (char *)0;
}
#include <ctype.h>
#include <regex.h>
#define MaxStyles 1024
#define MaxStr 2048
struct {
char *color;
int colornum;
char *font;
int fontnum;
char *style;
} Style[MaxStyles];
int CurStyle = 0;
int FN = 0, CN = 0;
#define MaxWords 512
struct {
int style;
int delim;
struct regex r;
} Word[MaxWords];
int CurWord = 0;
int FontOffset = 0, ColorOffset = 0, IgnoreCase = 0, WriteToStdout = 0;
addStyle(s)
char *s;
/*
* 's' is a comma-separated list of styles, e.g., "red,bold,/Helvetica"
* Parse these and incorporate in the 'Style[...]' table.
* Each "-c ..." option calls 'addStyle(...)' to add an entry to the table.
*/
{
int C[3], cn=3;
char *p = s, t[1024]="", f[256]="", c[256]="";
#define If(x) if (strcmp(s,x)==0)
while (p){
s = p;
if (p=index(p,',')) *p++ = '\0';
if (isdigit(*s)){ C[cn%3] = atoi(s), cn++; continue; }
else if (*s == '/') { strcpy(f,s+1); continue; }
else If("bold") strcat(t,"\\b");
else If("italic") strcat(t,"\\i"); /* can you think of any others? */
/* this should be implemented via named colors lists */
else If("red") { C[0]=255; C[1]=C[2] = 0; cn=6; continue; }
else If("green") { C[1]=255; C[0]=C[2] = 0; cn=6; continue; }
else If("blue") { C[2]=255; C[0]=C[1] = 0; cn=6; continue; }
else If("yellow") { C[0]=C[1]=255; C[2] = 0; cn=6; continue; }
else If("darkred") { C[0]=128; C[1]=C[2] = 0; cn=6; continue; }
else If("darkgreen") { C[1]=128; C[0]=C[2] = 0; cn=6; continue; }
else If("darkblue") { C[2]=128; C[0]=C[1] = 0; cn=6; continue; }
else If("darkyellow") { C[0]=C[1]=160; C[2] = 0; cn=6; continue; }
else strcat(t,s);
}
if (cn != 3) sprintf(c,"\\red%d\\green%d\\blue%d",C[0],C[1],C[2]);
Style[CurStyle].color = save(c); if (*c) Style[CurStyle].colornum= CN++;
Style[CurStyle].font = save(f); if (*f) Style[CurStyle].fontnum = FN++;
Style[CurStyle].style = save(t);
CurStyle++;
}
char *
ignorecase(s)
char *s;
/*
* Convert 's' from "abc" to "[aA][bB][cC]" to effect
* case-insensitive regex matching (n.b., some nebbish
* seems to have broken the "fold" option to re_compile()).
*/
{
char t[1024], *p = t, *st = s;
int push=0;
#define lower(c) (islower(c)? c : tolower(c))
#define upper(c) (isupper(c)? c : toupper(c))
while (*s){
if (isalpha(*s) && !push){
*p++ = '[';
*p++ = lower(*s);
*p++ = upper(*s);
*p++ = ']';
} else{
*p = *s;
if (*p == '[' && p>t && p[-1] != '\\') push=1;
if (push && *p == ']' && p>t && p[-1] != '\\') push=0;
p++;
}
s++;
}
*p = '\0';
strcpy(st,t);
}
compile(s, delim)
char *s;
int delim;
/*
* Compile 's' as a regular expression and add it to the 'Word[...]' table.
* 'delim' is true if 's' is delimited by "/.../" (i.e., a real regex)
* otherwise it's zero, indicating 's' was derived from a "word."
*/
{
Word[CurWord].style = CurStyle>0? CurStyle-1 : CurStyle;
Word[CurWord].delim = delim;
bcopy(re_compile(s,0),&(Word[CurWord].r),sizeof(struct regex));
CurWord++;
}
char *WordDelim = "[^a-zA-Z0-9]"; /* "words" are delimited by non-alphanums */
addWords(s)
char *s;
/*
* 's' is a comma-separated list of words or regular expressions.
* Parse these, enhance the regular expressions (i.e., derive some
* plausible ones for "words") and compile the expressions in the table.
*/
{
char *p, *next = s;
while (next && *next){
p = s = next;
if (*p == '/'){
Again:
s = index(s+1,'/');
if (s && s[-1]=='\\') goto Again;
if (!s) return error("bad format: %s",p);
next = (s[1] == ',')? s+2 : NULL;
} else {
s = index(s,',');
next = s? s + 1 : NULL;
}
if (s) *s = '\0';
if (*p == '/'){
char t[1024];
strcpy(t,p+1);
if (IgnoreCase) ignorecase(t);
compile(t,1);
} else {
char t[1024], u[1024];
strcpy(u,p);
if (IgnoreCase) ignorecase(u);
sprintf(t,"^%s%s",u,WordDelim); compile(t,0);
sprintf(t,"%s%s%s",WordDelim,u,WordDelim); compile(t,0);
}
}
}
rewrite(s)
char *s;
/*
* 's' is a line of input. Check the 'Word[...]' expressions
* and perform the requisite stylistic changes.
* This is essentially a global substitution within the line.
*/
{
char t[MaxStr], *p = t, q[1024], *u;
int i, n;
struct regex *r;
for (i=0;i<CurWord;i++){
r = &(Word[i].r);
u = s;
while (re_match(u,r)==1){
if (!Word[i].delim){
while (!isalnum(r->start[0])) r->start++;
while (!isalnum(r->end[-1])) r->end--;
}
strncpy(t,u,r->start-u);
p = t + (r->start - u);
strncpy(q,r->start,r->end-r->start); q[r->end-r->start]='\0';
n = Word[i].style;
#define str(x) ((x && x[0])? x : "")
sprintf(p,"{");
if (Style[n].font) sprintf(p+strlen(p),"\\f%d",
Style[n].fontnum+FontOffset);
if (Style[n].style) sprintf(p+strlen(p),"%s",str(Style[n].style));
if (Style[n].color) sprintf(p+strlen(p),"\\fc%d",
Style[n].colornum+ColorOffset);
sprintf(p+strlen(p)," %s}",q);
n = strlen(t);
strcpy(p+strlen(p),r->end);
strcpy(u,t);
u += n;
}
}
}
filterRTF(in,out)
FILE *in, *out;
/*
* copy RTF from 'in' to 'out', emending the font and color tables,
* (noting the proper FontOffset and ColorOffset), and performing the
* regular-expression-driven substitutions to rearrange the styles
* of given words or expressions.
*/
{
char s[MaxStr*2], *p, *q, *x;
int i, wroteTables = (CN || FN)? 0 : 1;
FontOffset = 0, ColorOffset = 0;
while (fgets(s,sizeof(s),in)){
p = s;
if (!wroteTables){
if (FN && recmp("{\\\\fonttbl",p)==0){
/* time for the font table */
x = p;
p = strindex(p,"fonttbl");
q = index(p,'}');
while (!q){
fgets(p+strlen(p),1024,in);
q = index(p,'}');
}
if (q){
while ((p=strindex(p,"\\f")) && p < q){
i = atoi(p+=2);
if (i>=FontOffset) FontOffset = i+1;
}
p = q; *p++ = '\0';
fputs(x,out);
for (i=0;i<CurStyle;i++){
if (Style[i].font){
fprintf(out,"\\f%d\\fnil %s;",
Style[i].fontnum+FontOffset,
Style[i].font);
}
}
fprintf(out,"}");
FN = 0;
if (!CN) wroteTables = 1;
} else
error("%s: warning, couldn't write font table!",av0);
}
if (CN && (recmp("\\\\pard",p)==0||recmp("{\\\\colortbl",p)==0)){
/* time for the color table */
x = p;
p = strindex(p,"colortbl");
ColorOffset = 1;
if (p){
q = index(p,'}');
while (!q){
fgets(p+strlen(p),1024,in);
q = index(p,'}');
}
if (q){
p = index(p,';');
while ((p=index(p+1,';')) && p < q)
ColorOffset++;
p = q; *p = '\0';
fputs(x,out);
for (i=0;i<CurStyle;i++){
if (Style[i].color){
fprintf(out,"%s;",Style[i].color);
}
}
fprintf(out,"}");
}
} else {
p = strindex(x,"\\pard");
*p++ = '\0';
fprintf(out,"{\\colortbl;");
for (i=0;i<CurStyle;i++){
if (Style[i].color){
fprintf(out,"%s;",Style[i].color);
}
}
fprintf(out,"}\\");
}
CN = 0;
if (!FN) wroteTables = 1;
}
}
if (*p){ /* now rewrite the line for any substitutions */
rewrite(p);
fputs(p,out);
}
}
}
char *
tail(char *s){
char *p = (char *)rindex(s,'/');
if (!p) p = s;
return p;
}
rewriteRTFFile(s)
char *s;
/*
* Rewrite rtf or rtfd file 's' to effect the given changes.
* A copy of the old file is saved in /tmp.
* Fails if for some reason it can't create files there.
*/
{
FILE *in,*out;
char t[1024], u[1024];
static int x = 0;
if (access(s,0)) return error("%s: couldn't read '%s'",av0,s);
if (isDirectory(s)){
sprintf(u,"%s/TXT.rtf",s);
sprintf(t,"/tmp/%s.%d.rtfd",tail(s),getpid(),x++);
System("mkdir %s; cp %s %s",t,u,t);
strcat(t,"TXT.rtf");
s = u;
} else {
sprintf(t,"/tmp/%s.%d%d",tail(s),getpid(),x++);
System("cat %s > %s",s,t);
}
in = fopen(t,"r");
if (!in) return error("%s: couldn't backup '%s' to '%s'",av0,s,t);
out = WriteToStdout? stdout : fopen(s,"w");
if (!out) return fclose(in), error("%s: couldn't write '%s'",av0,s);
filterRTF(in,out);
fclose(in);
fclose(out);
}
use(){
#define E error
E("use: %s [-i] [-o] [-c style...] [-w words...] [files or stdin]",av0);
E("highlight words in an RTF file, or make other format changes.");
E(" -c comma-separated colors, fonts, keywords, or rtf directives;");
E(" colors may be written as 'r,g,b' values as in '128,0,0'");
E(" or one of: red green blue yellow darkred darkgreen etc...");
E(" font names are preceded with '/', as in '/Sabon-Roman'");
E(" keywords are: bold italic");
E(" \"raw\" rtf directives are written with a backslash '\\b\\i'");
E(" e.g.: -c red,bold,\\fs36,/Sabon-Roman");
E(" -w comma-separated words or regular expressions enclosed in /.../");
E(" -i case-insensitive matching.");
E(" -o force output to stdout.");
exit(1);
}
main(ac,av) char *av[]; {
int i;
for_each_argument {
case 'c': addStyle(argument);
Case 'w': addWords(argument);
Case 'i': IgnoreCase++;
Case 'o': WriteToStdout++;
Default : use();
}
if (i==ac) filterRTF(stdin,stdout);
else while (i<ac) rewriteRTFFile(av[i++]);
exit(0);
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.