This is string.c in view mode; [Download] [Up]
/*
** Copyright (C) 1995, Enterprise Integration Technologies Corp.
** All Rights Reserved.
** Kevin Hughes, kevinh@eit.com
** 3/11/94
*/
#include "swish.h"
#include "string.h"
/* My own case-insensitive strstr().
*/
char *lstrstr(s, t)
char *s;
char *t;
{
int i, j, k, l;
for (i = 0; s[i]; i++) {
for (j = 0, l = k = i; s[k] && t[j] &&
tolower(s[k]) == tolower(t[j]); j++, k++)
;
if (t[j] == '\0')
return s + l;
}
return NULL;
}
/* Gets the next word in a line. If the word's in quotes,
** include blank spaces in the word or phrase.
*/
char *getword(line, skiplen)
char *line;
int *skiplen;
{
int i, inquotes;
char *start;
static char word[MAXSTRLEN];
start = line;
if (!(*line))
return "\0";
while (isspace(*line))
line++;
if (!(*line))
return "\0";
if (*line == '\"') {
inquotes = 1;
line++;
}
else
inquotes = 0;
for (i = 0; *line && i < MAXSTRLEN &&
((inquotes) ? (*line != '\"') : (!isspace(*line))); line++)
word[i++] = *line;
word[i] = '\0';
if (!(*line))
return "\0";
if (*line == '\"')
line++;
*skiplen = line - start;
return word;
}
/* Gets the value of a variable in a line of the configuration file.
** Basically, anything in quotes or an argument to a variable.
*/
char *getconfvalue(line, var, value)
char *line;
char *var;
char *value;
{
int i;
char *c;
static char tmpvalue[MAXSTRLEN];
if ((c = (char *) lstrstr(line, var)) != NULL) {
if (c != line)
return NULL;
c += strlen(var);
while (isspace(*c) || *c == '\"')
c++;
if (*c == '\0')
return NULL;
for (i = 0; *c != '\0' && *c != '\"' && *c != '\n' &&
i < MAXSTRLEN; c++)
tmpvalue[i++] = *c;
tmpvalue[i] = '\0';
strcpy(value, tmpvalue);
return tmpvalue;
}
else
return NULL;
}
/* Extracts anything in <title> tags from an HTML file and returns it.
** Otherwise, only the file name without its path is returned.
*/
char *parsetitle(filename)
char *filename;
{
register int c, d;
register char *p;
char *tag;
static char title[MAXTITLELEN], shorttitle[MAXTITLELEN];
int i, j, lines, status, tagbuflen, totaltaglen;
FILE *fp;
tag = (char *) emalloc(1);
tag[0] = '\0';
lines = status = 0;
p = title;
*p = '\0';
if (strrchr(filename, '/'))
strcpy(shorttitle, strrchr(filename, '/') + 1);
else
strcpy(shorttitle, filename);
fp = fopen(filename, "r");
if (fp == NULL) {
free(tag);
return shorttitle;
}
for (; lines < TITLETOPLINES ; ) {
c = getc(fp);
if (c == '\n')
lines++;
if (feof(fp)) {
fclose(fp);
free(tag);
return shorttitle;
}
switch(c) {
case '<':
free(tag);
tag = (char *) emalloc(MAXSTRLEN);
tag[0] = '\0';
tagbuflen = totaltaglen = 0;
tag[totaltaglen++] = '<';
tagbuflen++;
status = TI_OPEN;
while (1) {
d = getc(fp);
if (d == EOF) {
fclose(fp);
free(tag);
return shorttitle;
}
else if (d == '>') {
tag[totaltaglen] = '\0';
tag = (char *) erealloc(tag,
totaltaglen + MAXSTRLEN);
sprintf(tag, "%s%c", tag, '>');
break;
}
tag[totaltaglen++] = d;
tagbuflen++;
if (tagbuflen >= MAXSTRLEN) {
tag = (char *) erealloc(tag,
totaltaglen + MAXSTRLEN);
tagbuflen = 0;
}
}
if (lstrstr(tag, "</title>")) {
status = TI_CLOSE;
*p = '\0';
fclose(fp);
for (i = 0; title[i]; i++)
if (title[i] == '\n')
title[i] = ' ';
for (i = 0; isspace(title[i]) ||
title[i] == '\"'; i++)
;
for (j = 0; title[i]; j++)
title[j] = title[i++];
for (j = strlen(title) - 1;
(j && isspace(title[j]))
|| title[j] == '\0' || title[j] == '\"'; j--)
title[j] = '\0';
for (j = 0; title[j]; j++)
if (title[j] == '\"')
title[j] = '\'';
free(tag);
return title;
}
else {
if (lstrstr(tag, "<title>"))
status = TI_FOUND;
}
break;
default:
if (status == TI_FOUND) {
*p = c;
p++;
}
else {
if (status == TI_CLOSE) {
fclose(fp);
free(tag);
return shorttitle;
}
}
}
}
fclose(fp);
free(tag);
return shorttitle;
}
/* Malloc()'s a string, returns it.
*/
char *mystrdup(s)
char *s;
{
char *p;
p = (char *) emalloc(strlen(s) + 1);
strcpy(p, s);
return p;
}
/* Is a character a valid word character?
*/
int iswordchar(c)
char c;
{
int d, i;
d = tolower(c);
for (i = 0; WORDCHARS[i] != '\0'; i++)
if (d == WORDCHARS[i])
return 1;
return 0;
}
/* In a string, replaces all occurrences of "oldpiece" with "newpiece".
** This is not really bulletproof yet.
*/
char *replace(string, oldpiece, newpiece)
char *string;
char *oldpiece;
char *newpiece;
{
int i, j, limit;
char *c;
char beforestring[MAXSTRLEN], afterstring[MAXSTRLEN];
static char newstring[MAXSTRLEN];
if ((c = (char *) strstr(string, oldpiece)) == NULL)
return string;
limit = c - string;
for (i = 0; i < limit; i++)
beforestring[i] = string[i];
beforestring[i] = '\0';
i += strlen(oldpiece);
for (j = 0; string[i] != '\0'; i++)
afterstring[j++] = string[i];
afterstring[j] = '\0';
sprintf(newstring, "%s%s%s", beforestring, newpiece, afterstring);
while (strstr(newstring, oldpiece))
strcpy(newstring, replace(newstring, oldpiece, newpiece));
return newstring;
}
/* Like strcmp(), but the order of sorting the first char is
** determined by the order of the characters in the wordchars array.
*/
int wordcompare(s1, s2)
char *s1;
char *s2;
{
register int i, j;
if (s1[0] != s2[0]) {
for (i = 0; WORDCHARS[i] != '\0'; i++)
if (s1[0] == WORDCHARS[i])
break;
for (j = 0; WORDCHARS[j] != '\0'; j++)
if (s2[0] == WORDCHARS[j])
break;
if (i < j)
return -1;
else
return 1;
}
else
return strcmp(s1, s2);
}
/* This converts HTML numbered entities (such as ©)
** to strings (like ©). Much is this function is
** simply adding semicolons in the right places.
** This and the functions it calls are not very fast
** and could be made faster.
*/
char *convertentities(s)
char *s;
{
int i, skip;
char ent[MAXENTLEN];
static char newword[MAXWORDLEN];
if ((char *) strchr(s, '&') == NULL)
return s;
if (strlen(s) > MAXWORDLIMIT)
return s;
for (i = 0; i < MAXWORDLEN; i++)
newword[i] = '\0';
for (; *s != '\0'; s++) {
if (*s == '&') {
strcpy(ent, getent(s, &skip));
if (ent[0] == '\0') {
sprintf(newword, "%s&", newword);
continue;
}
s += skip;
if (*s == ';')
s++;
sprintf(newword, "%s%s;", newword, ent);
s--;
}
else
sprintf(newword, "%s%c", newword, *s);
}
strcpy(newword, (char *) converttonamed(newword));
if (ASCIIENTITIES)
strcpy(newword, (char *) converttoascii(newword));
return newword;
}
/* Returns a matching entity that matches the beginning of a string, if any.
*/
char *getent(s, skip)
char *s;
int *skip;
{
int i;
static char ent[MAXENTLEN], testent[MAXENTLEN];
*skip = 0;
strncpy(ent, s, MAXENTLEN);
if (ent[1] == '#') {
if (isdigit(ent[5]))
return "\0";
for (i = 2; ent[i] != '\0' && isdigit(ent[i]); i++)
;
while (ent[i] != '\0' && !isdigit(ent[i]))
ent[i++] = '\0';
*skip = strlen(ent);
return ent;
}
else {
for (i = 0; entities[i] != NULL; i += 3) {
strcpy(testent, entities[i]);
if (testent[0] != '\0') {
if (!strncmp(testent, ent, strlen(testent))) {
strcpy(ent, testent);
*skip = strlen(ent);
return ent;
}
}
}
}
return "\0";
}
/* This is the real function called by convertentities() that
** changes numbered to named entities.
*/
char *converttonamed(s)
char *s;
{
int i;
char testent[MAXENTLEN], newent[MAXENTLEN];
static char newword[MAXWORDLEN];
strcpy(newword, s);
for (i = 0; entities[i] != NULL; i += 3) {
sprintf(testent, "%s;", entities[i + 1]);
if (strstr(newword, testent) != NULL &&
(entities[i])[0] != '\0') {
sprintf(newent, "%s;", entities[i]);
strcpy(newword, (char *) replace(newword,
testent, newent));
}
}
while (hasnumbered(newword))
strcpy(newword, (char *) converttonamed(newword));
return newword;
}
/* Does a string still contain numbered entities that can be converted?
*/
int hasnumbered(s)
char *s;
{
int i;
char testent[MAXENTLEN];
for (i = 0; entities[i] != NULL; i += 3) {
sprintf(testent, "%s;", entities[i + 1]);
if (strstr(s, testent) != NULL && (entities[i])[0] != '\0')
return 1;
}
return 0;
}
/* This function converts all convertable named and numbered
** entities to their ASCII equivalents, if they exist.
*/
char *converttoascii(s)
char *s;
{
int i;
char *c, *d, wrdent[MAXENTLEN], nument[MAXENTLEN];
static char newword[MAXWORDLEN];
strcpy(newword, s);
for (i = 0; entities[i] != NULL; i += 3) {
sprintf(wrdent, "%s;", entities[i]);
sprintf(nument, "%s;", entities[i + 1]);
c = d = NULL;
if ((entities[i])[0] != '\0')
c = (char *) strstr(newword, wrdent);
if ((entities[i + 1])[0] != '\0')
d = (char *) strstr(newword, nument);
if ((entities[i + 2])[0] != '\0') {
if (c != NULL)
strcpy(newword, (char *) replace(newword,
wrdent, entities[i + 2]));
if (d != NULL)
strcpy(newword, (char *) replace(newword,
nument, entities[i + 2]));
}
}
while (hasnonascii(newword))
strcpy(newword, (char *) converttoascii(newword));
return newword;
}
/* Does a string still contain numbered or named entities that can
** be converted to ascii equivalents?
*/
int hasnonascii(s)
char *s;
{
int i;
char *c, *d;
for (i = 0; entities[i] != NULL; i += 3) {
c = d = NULL;
if ((entities[i])[0] != '\0')
c = (char *) strstr(s, entities[i]);
if ((entities[i + 1])[0] != '\0')
d = (char *) strstr(s, entities[i + 1]);
if ((entities[i + 2])[0] != '\0')
if (c != NULL || d != NULL)
return 1;
}
return 0;
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.