ftp.nice.ch/pub/next/unix/security/pgp.2.6.2is.s.tar.gz#/pgp262is/pgp262ii/contrib/langtool/language.c

This is language.c in view mode; [Download] [Up]

/*
 *	language.c - Foreign language translation for PGP
 *	Finds foreign language "subtitles" for English phrases 
 *	in external foriegn language text file.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "usuals.h"
#ifndef LANGTOOL
#include "fileio.h"
#include "language.h"
#include "pgp.h"
#else
#define MAX_PATH	255
boolean verbose;
long fsize();
#endif

char langfile[80] = "language.txt";
#define LANG_INDEXFILE	"language.idx"

#define	STRBUFSIZE		2048

char language[16] = "en";	/* The language code, defaults to English */
static char	*strbuf;
static char	lang[16];	/* readstr sets this to the language id of the msg it last read */
static int	subtitles_available = 0;
static int line = 0;
static int errcount = 0;
/*	subtitles_available is used to determine if we know whether the special
	subtitles_file exists.  subtitles_available has the following values:
	0  = first time thru, we don't yet know if subtitles_file exists.
	1  = we have already determined that subtitles_file exists.
	-1 = we have already determined that subtitles_file does not exist.
*/

static void error(char *);

#define	NEWLINE		0
#define	COMMENT		1
#define	INSTRING	2
#define	ESCAPE		3
#define	IDENT		4
#define	DONE		5
#define	ERROR		6
#define	ERR1		7

/* Look for and return a quoted string from the file.
 * If nlabort is true, return failure if we find a blank line
 * before we find the opening quote.
 */
static char	*
readstr (FILE *f, char *buf, int nlabort)
{
	int	c, d;
	char *p = buf;
	int state = NEWLINE;
	int i = 0;
	
	while ((c = getc(f)) != EOF) {
		if (c == '\r')
			continue;
		/* line numbers are only incremented when creating index file */
		if (line && c == '\n')
			++line;
		switch (state) {
		  case NEWLINE:
			switch(c) {
			  case '#': state = COMMENT; break;
			  case '"': state = INSTRING; break;
			  case '\n':
				if (nlabort) {
					*buf = '\0';
					return(buf);
				}
			  default:
				if (i == 0 && isalnum(c)) {
					state = IDENT;
					lang[i++] = c;
					break;
				}
				if (!isspace(c)) {
					error("syntax error\n");
					state = ERROR;
				}
			}
			break;
		  case COMMENT:
			if (c == '\n')
				state = NEWLINE;
			break;
		  case INSTRING:
			switch(c) {
			  case '\\': state = ESCAPE; break;
			  case '"': state = DONE; break;
			  default: *p++ = c;
			}
			break;
		case ESCAPE:
			switch (c) {
			  case 'n':	*p++ = '\n';	break;
			  case 'r':	*p++ = '\r';	break;
			  case 't':	*p++ = '\t';	break;
			  case 'e':	*p++ = '\033';	break;
			  case 'a':	*p++ = '\007';	break;
			  case '#':
			  case '"':
			  case '\\':	*p++ = c; break;
			  case '\n':	break;
			  case '0':
			  case '1':
			  case '2':
			  case '3':
			  case '4':
			  case '5':
			  case '6':
			  case '7':
				/* ANSI C rules: up to 3 octal digits */
				d = c - '0';
				if ((c = getc(f)) >= '0' && c <= '7') {
					d = (d<<3) + (c-'0');
					if ((c = getc(f)) >= '0' && c <= '7')
						d = (d<<3) + (c-'0');
					else
						ungetc(c, f);
				} else {
					ungetc(c, f);
				}
				*p++ = d;
				break;
			  default:
				error("illegal escape sequence: ");
				fprintf(stderr, "'\\%c'\n", c);
				break;
			}
			state = INSTRING;
			break;
		  case IDENT:		/* language identifier */
			if (c == ':') {
				state = NEWLINE;
				break;
			}
			if (c == '\n' && strncmp(lang, "No translation", 14) == 0)
			{
				i = 0;
				state = NEWLINE;
				break;
			}
			lang[i++] = c;
			if (i == 15 || !isalnum(c) && !isspace(c)) {
				lang[i] = '\0';
				error("bad language identifier\n");
				state = ERROR;
				i = 0;
			}
			break;
		  case DONE:
			if (c == '\n') {
				lang[i] = '\0';
				*p = '\0';
				return(buf);
			}
			if (!isspace(c)) {
				error("extra characters after '\"'\n");
				state = ERROR;
			}
			break;
		  case ERROR:
			if (c == '\n')
				state = ERR1;
			break;
		  case ERR1:
			state = (c == '\n' ? NEWLINE : ERROR);
			break;
		}
	}
	if (state != NEWLINE)
		error("unexpected EOF\n");
	return(NULL);
}


static struct indx_ent {
	word32	crc;
	long	offset;
} *indx_tbl = NULL;

static int max_msgs = 0;
static int nmsg = 0;

static FILE *langf;

static struct {
	long lang_fsize;	/* size of language.txt */
	char lang[16];		/* language identifier */
	int nmsg;		/* number of messages */
} indx_hdr;


static int make_indexfile(char *);
word32 crcupdate(byte, word32);
void init_crc();

/*
 * uses 24-bit CRC function from armor.c
 */
static word32
message_crc(char *s)
{
	word32 crc = 0;

	while (*s)
		crc = crcupdate(*s++, crc);
	return(crc);
}

/*
 * lookup file offset in indx_tbl
 */
static long
lookup_offset(word32 crc)
{
	int i;
	
	for (i = 0; i < nmsg; ++i)
		if (indx_tbl[i].crc == crc)
			return(indx_tbl[i].offset);
	return(-1);
}



#ifndef LANGTOOL
static void init_lang();

/*
 * return foreign translation of s
 */
char *
PSTR (char *s)
{
	long filepos;

	if (subtitles_available == 0)
		init_lang();
	if (subtitles_available < 0)
		return(s);

	filepos = lookup_offset(message_crc(s));
	if (filepos == -1) {
		return(s);
	} else {
		fseek(langf, filepos, SEEK_SET);
		readstr(langf, strbuf, 1);
	}

	if (strbuf[0] == '\0')
		return(s);

	for (s = strbuf; *s; ++s)
		*s = EXT_C(*s);
	return(strbuf);
}

/*
 * initialize the index table: read it from language.idx or create
 * a new one and write it to the index file. A new index file is
 * created if the language set in config.pgp doesn't match the one
 * in language.idx or if the size of language.txt has changed.
 */
static void
init_lang()
{
	char indexfile[MAX_PATH];
	char subtitles_file[MAX_PATH];
	FILE *indexf;

	if (strcmp(language, "en") == 0) {
		subtitles_available = -1;
		return;		/* use default messages */
	}

	buildfilename (subtitles_file, langfile);
	if ((langf = fopen(subtitles_file, "rb")) == NULL) {
		subtitles_available = -1;
		return;
	}
	init_crc();
	if ((strbuf = (char *) malloc(STRBUFSIZE)) == NULL) {
		fprintf(stderr, "Not enough memory for foreign subtitles\n");
		fclose(langf);
		subtitles_available = -1;
		return;
	}
	buildfilename(indexfile, LANG_INDEXFILE);
	if ((indexf = fopen(indexfile, "rb")) != NULL) {
		if (fread(&indx_hdr, sizeof(indx_hdr), 1, indexf) == 1 &&
			indx_hdr.lang_fsize == fsize(langf) &&
			strcmp(indx_hdr.lang, language) == 0)
		{
			nmsg = indx_hdr.nmsg;
			indx_tbl = (struct indx_ent *) malloc(nmsg * sizeof(struct indx_ent));
			if (indx_tbl == NULL) {
				fprintf(stderr, "Not enough memory for foreign subtitles\n");
				fclose(indexf);
				fclose(langf);
				subtitles_available = -1;
				return;
			}
			if (fread(indx_tbl, sizeof(struct indx_ent), nmsg, indexf) != nmsg)
			{
				free(indx_tbl);	/* create a new one */
				indx_tbl = NULL;
			}
		}
		fclose(indexf);
	}
	if (indx_tbl == NULL && make_indexfile(indexfile) < 0) {
		fclose(langf);
		subtitles_available = -1;
	} else {
		subtitles_available = 1;
	}
}
#endif /* !LANGTOOL */


/*
 * build the index table in memory, and if indexfile is not NULL,
 * write it to this file
 */
static int
make_indexfile(char *indexfile)
{
	FILE *indexf;
	long filepos;
	int total_msgs = 0;
	char *res;

	rewind(langf);
	indx_hdr.lang_fsize = fsize(langf);
	strncpy(indx_hdr.lang, language, 15);
	init_crc();
	line = 1;
	nmsg = 0;
	while (readstr(langf, strbuf, 0)) {
		if (nmsg == max_msgs) {
			if (max_msgs) {
				max_msgs *= 2;
				indx_tbl = (struct indx_ent *) realloc(indx_tbl, max_msgs *
							sizeof(struct indx_ent));
			} else {
				max_msgs = 400;
				indx_tbl = (struct indx_ent *) malloc(max_msgs *
							sizeof(struct indx_ent));
			}
			if (indx_tbl == NULL) {
				fprintf(stderr, "Not enough memory for foreign subtitles\n");
				return(-1);
			}
		}
		++total_msgs;
		indx_tbl[nmsg].crc = message_crc(strbuf);
		if (lookup_offset(indx_tbl[nmsg].crc) != -1)
			error("message CRC not unique.\n");
		do {
			filepos = ftell(langf);
			res = readstr (langf, strbuf, 1);		/* Abort if find newline first */
			if (*language == '\0')		/* use first language found */
				strcpy(language, lang);
		} while (res && strbuf[0] != '\0' && strcmp(language, lang) != 0);

		if (res == NULL)
			break;
		if (strbuf[0] == '\0')	/* No translation */
			continue;

		indx_tbl[nmsg].offset = filepos;
		++nmsg;
		do
			res = readstr (langf, strbuf, 1);		/* Abort if find newline first */
		while (res && strbuf[0] != '\0');
	}
	line = 0;
	indx_hdr.nmsg = nmsg;
	if (verbose)
		fprintf(stderr, "%s: %d messages, %d translations for language \"%s\"\n",
				langfile, total_msgs, nmsg, language);
	if (nmsg == 0) {
		fprintf(stderr, "No translations available for language \"%s\"\n\n",
				language);
		return(-1);
	}

	if (indexfile) {
		if ((indexf = fopen(indexfile, "wb")) == NULL)
			fprintf(stderr, "Cannot create %s\n", indexfile);
		else {
			fwrite(&indx_hdr, 1, sizeof(indx_hdr), indexf);
			fwrite(indx_tbl, sizeof(struct indx_ent), nmsg, indexf);
			if (ferror(indexf) || fclose(indexf))
				fprintf(stderr, "error writing %s\n", indexfile);
		}
	}
	return(0);
}

static void
error(char *s)
{
	++errcount;
	if (langfile[0])
		fprintf(stderr, "%s:", langfile);
	if (line)
		fprintf(stderr, "%d:", line);
	fprintf(stderr, " %s", s);
}

#ifdef LANGTOOL
/*
 * language string tool for manipulating language files
 * link with CRC routines from armor.c
 */

#define CMD_EXTRACT	1
#define CMD_CHECK	2
#define CMD_MERGE	3

extern char *optarg;
extern int optind;

main(int argc, char **argv)
{
	int opt, cmd = 0, rc = 0;
	char *langIDs[16];
	char *outfile = NULL;

	init_crc();
	if ((strbuf = (char *) malloc(STRBUFSIZE)) == NULL) {
		perror(argv[0]);
		exit(1);
	}
	while ((opt = getopt(argc, argv, "cxmo:")) != EOF) {
		switch (opt) {
		  case 'c': cmd = CMD_CHECK; break;
		  case 'x': cmd = CMD_EXTRACT; break;
		  case 'm': cmd = CMD_MERGE; break;
		  case 'o': outfile = optarg; break;
		  default: usage();
		}
	}
	argc -= optind; argv += optind;
	switch (cmd) {
		case CMD_EXTRACT:
			if (argc < 2)
				usage();
			rc = extract(argv[0], outfile, &argv[1]);
			break;
		case CMD_MERGE:
			if (argc < 2)
				usage();
			rc = merge(argv[0], argv[1], outfile, argv[2]);
			break;
		case CMD_CHECK:
			verbose = 1;
			if (argc == 0)
				checkfile("language.txt");
			else
				while (--argc >= 0)
					checkfile(*argv++);
			break;
		default: usage();
	}
	exit(rc);
}

usage()
{
	fprintf(stderr, "usage: langtool -[x|c|m] [-o outputfile] ...\n\n\
To extract one or more languages from a merged file:\n\
	langtool -x [-o outputfile] file langID...\n\n\
To check a language file for syntax errors:\n\
	langtool -c file...\n\n\
To merge language \"lang\" from lang_file with source_file:\n\
	langtool -m [-o outputfile] source_file lang_file [lang]\n");
	exit(1);
}

merge(char *base_file, char *lang_file, char *outfile, char *langID)
{
	FILE *fp, *outf;
	long fpos = 0, filepos;
	int newmsgs = 0;

	if ((langf = fopen(lang_file, "r")) == NULL) {
		perror(lang_file);
		return -1;
	}
	strcpy(langfile, lang_file);
	if (langID)
		strcpy(language, langID);
	else
		language[0] = '\0';	/* use first language found */

	errcount = 0;
	make_indexfile(NULL);
	if (errcount)
		return -1;

	langfile[0] = '\0';	/* don't print filename in error msgs */

	if ((fp = fopen(base_file, "r")) == NULL) {
		perror(base_file);
		return -1;
	}
	if (outfile == NULL)
		outf = stdout;
	else {
		if ((outf = fopen(outfile, "w")) == NULL) {
			perror(outfile);
			return(-1);
		}
	}

	while (readstr(fp, strbuf, 0)) {
		copypos(fp, outf, fpos);
		fpos = ftell(fp);

		filepos = lookup_offset(message_crc(strbuf));
		if (filepos == -1) {
			fprintf(outf, "No translation\n");
			++newmsgs;
		} else {
			fseek(langf, filepos, SEEK_SET);
			readstr(langf, strbuf, 1);
			copypos(langf, outf, filepos);
		}

		while (readstr(fp, strbuf, 1))
			if (*strbuf == '\0')
				break;
	}
	copypos(fp, outf, fpos);
	fflush(outf);
	if (ferror(outf)) {
		perror(outfile);
		return -1;
	}
	if (newmsgs)
		fprintf(stderr, "%d untranslated messages\n", newmsgs);
	return errcount;
}

extract(char *infile, char *outfile, char **langIDs)
{
	FILE *fp, *outf;
	long fpos = 0;
	char **langID;

	if ((fp = fopen(infile, "r")) == NULL) {
		perror(infile);
		return -1;
	}
	if (outfile == NULL) {
		outf = stdout;
	} else {
		if ((outf = fopen(outfile, "w")) == NULL) {
			perror(outfile);
			fclose(fp);
			return(-1);
		}
	}

	while (readstr(fp, strbuf, 0)) {
		copypos(fp, outf, fpos);
		fpos = ftell(fp);
		while (readstr(fp, strbuf, 1)) {
			if (*strbuf == '\0')
				break;
			for (langID = langIDs; *langID; ++langID) {
				if (strcmp(lang, *langID) == 0)
					copypos(fp, outf, fpos);
			}
			fpos = ftell(fp);
		}
	}
	copypos(fp, outf, fpos);
	fflush(outf);
	if (ferror(outf)) {
		perror(outfile);
		return -1;
	}
	return 0;
}

checkfile(char *name)
{
	if ((langf = fopen(name, "rb")) == NULL) {
		perror(name);
		return -1;
	}
	strcpy(langfile, name);
	language[0] = '\0';	/* count messages for first language */
	errcount = 0;
	make_indexfile(NULL);
	fclose(langf);
	return errcount;
}

copypos(FILE *f, FILE *g, long pos)
{
	long size;
	size = ftell(f) - pos;
	fseek(f, pos, SEEK_SET);
	copyfile(f, g, size);
}

copyfile(FILE *f, FILE *g, long n)
{
	int c;

	while (--n >= 0 && (c = getc(f)) != EOF)
		putc(c, g);
}

long
fsize(FILE *f)
{
	long len, pos;
	pos = ftell(f);
	fseek(f, 0L, SEEK_END);
	len = ftell(f);
	fseek(f, pos, SEEK_SET);
	return len;
}
#endif /* LANGTOOL */

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.