This is string.c in view mode; [Download] [Up]
/* ** Copyright (C) 1995, Enterprise Integration Technologies Corp. ** All Rights Reserved. ** Kevin Hughes, kevinh@eit.com ** 3/11/94 */ #include "swish.h" #include "string.h" /* My own case-insensitive strstr(). */ char *lstrstr(s, t) char *s; char *t; { int i, j, k, l; for (i = 0; s[i]; i++) { for (j = 0, l = k = i; s[k] && t[j] && tolower(s[k]) == tolower(t[j]); j++, k++) ; if (t[j] == '\0') return s + l; } return NULL; } /* Gets the next word in a line. If the word's in quotes, ** include blank spaces in the word or phrase. */ char *getword(line, skiplen) char *line; int *skiplen; { int i, inquotes; char *start; static char word[MAXSTRLEN]; start = line; if (!(*line)) return "\0"; while (isspace(*line)) line++; if (!(*line)) return "\0"; if (*line == '\"') { inquotes = 1; line++; } else inquotes = 0; for (i = 0; *line && i < MAXSTRLEN && ((inquotes) ? (*line != '\"') : (!isspace(*line))); line++) word[i++] = *line; word[i] = '\0'; if (!(*line)) return "\0"; if (*line == '\"') line++; *skiplen = line - start; return word; } /* Gets the value of a variable in a line of the configuration file. ** Basically, anything in quotes or an argument to a variable. */ char *getconfvalue(line, var, value) char *line; char *var; char *value; { int i; char *c; static char tmpvalue[MAXSTRLEN]; if ((c = (char *) lstrstr(line, var)) != NULL) { if (c != line) return NULL; c += strlen(var); while (isspace(*c) || *c == '\"') c++; if (*c == '\0') return NULL; for (i = 0; *c != '\0' && *c != '\"' && *c != '\n' && i < MAXSTRLEN; c++) tmpvalue[i++] = *c; tmpvalue[i] = '\0'; strcpy(value, tmpvalue); return tmpvalue; } else return NULL; } /* Extracts anything in <title> tags from an HTML file and returns it. ** Otherwise, only the file name without its path is returned. */ char *parsetitle(filename) char *filename; { register int c, d; register char *p; char *tag; static char title[MAXTITLELEN], shorttitle[MAXTITLELEN]; int i, j, lines, status, tagbuflen, totaltaglen; FILE *fp; tag = (char *) emalloc(1); tag[0] = '\0'; lines = status = 0; p = title; *p = '\0'; if (strrchr(filename, '/')) strcpy(shorttitle, strrchr(filename, '/') + 1); else strcpy(shorttitle, filename); fp = fopen(filename, "r"); if (fp == NULL) { free(tag); return shorttitle; } for (; lines < TITLETOPLINES ; ) { c = getc(fp); if (c == '\n') lines++; if (feof(fp)) { fclose(fp); free(tag); return shorttitle; } switch(c) { case '<': free(tag); tag = (char *) emalloc(MAXSTRLEN); tag[0] = '\0'; tagbuflen = totaltaglen = 0; tag[totaltaglen++] = '<'; tagbuflen++; status = TI_OPEN; while (1) { d = getc(fp); if (d == EOF) { fclose(fp); free(tag); return shorttitle; } else if (d == '>') { tag[totaltaglen] = '\0'; tag = (char *) erealloc(tag, totaltaglen + MAXSTRLEN); sprintf(tag, "%s%c", tag, '>'); break; } tag[totaltaglen++] = d; tagbuflen++; if (tagbuflen >= MAXSTRLEN) { tag = (char *) erealloc(tag, totaltaglen + MAXSTRLEN); tagbuflen = 0; } } if (lstrstr(tag, "</title>")) { status = TI_CLOSE; *p = '\0'; fclose(fp); for (i = 0; title[i]; i++) if (title[i] == '\n') title[i] = ' '; for (i = 0; isspace(title[i]) || title[i] == '\"'; i++) ; for (j = 0; title[i]; j++) title[j] = title[i++]; for (j = strlen(title) - 1; (j && isspace(title[j])) || title[j] == '\0' || title[j] == '\"'; j--) title[j] = '\0'; for (j = 0; title[j]; j++) if (title[j] == '\"') title[j] = '\''; free(tag); return title; } else { if (lstrstr(tag, "<title>")) status = TI_FOUND; } break; default: if (status == TI_FOUND) { *p = c; p++; } else { if (status == TI_CLOSE) { fclose(fp); free(tag); return shorttitle; } } } } fclose(fp); free(tag); return shorttitle; } /* Malloc()'s a string, returns it. */ char *mystrdup(s) char *s; { char *p; p = (char *) emalloc(strlen(s) + 1); strcpy(p, s); return p; } /* Is a character a valid word character? */ int iswordchar(c) char c; { int d, i; d = tolower(c); for (i = 0; WORDCHARS[i] != '\0'; i++) if (d == WORDCHARS[i]) return 1; return 0; } /* In a string, replaces all occurrences of "oldpiece" with "newpiece". ** This is not really bulletproof yet. */ char *replace(string, oldpiece, newpiece) char *string; char *oldpiece; char *newpiece; { int i, j, limit; char *c; char beforestring[MAXSTRLEN], afterstring[MAXSTRLEN]; static char newstring[MAXSTRLEN]; if ((c = (char *) strstr(string, oldpiece)) == NULL) return string; limit = c - string; for (i = 0; i < limit; i++) beforestring[i] = string[i]; beforestring[i] = '\0'; i += strlen(oldpiece); for (j = 0; string[i] != '\0'; i++) afterstring[j++] = string[i]; afterstring[j] = '\0'; sprintf(newstring, "%s%s%s", beforestring, newpiece, afterstring); while (strstr(newstring, oldpiece)) strcpy(newstring, replace(newstring, oldpiece, newpiece)); return newstring; } /* Like strcmp(), but the order of sorting the first char is ** determined by the order of the characters in the wordchars array. */ int wordcompare(s1, s2) char *s1; char *s2; { register int i, j; if (s1[0] != s2[0]) { for (i = 0; WORDCHARS[i] != '\0'; i++) if (s1[0] == WORDCHARS[i]) break; for (j = 0; WORDCHARS[j] != '\0'; j++) if (s2[0] == WORDCHARS[j]) break; if (i < j) return -1; else return 1; } else return strcmp(s1, s2); } /* This converts HTML numbered entities (such as ©) ** to strings (like ©). Much is this function is ** simply adding semicolons in the right places. ** This and the functions it calls are not very fast ** and could be made faster. */ char *convertentities(s) char *s; { int i, skip; char ent[MAXENTLEN]; static char newword[MAXWORDLEN]; if ((char *) strchr(s, '&') == NULL) return s; if (strlen(s) > MAXWORDLIMIT) return s; for (i = 0; i < MAXWORDLEN; i++) newword[i] = '\0'; for (; *s != '\0'; s++) { if (*s == '&') { strcpy(ent, getent(s, &skip)); if (ent[0] == '\0') { sprintf(newword, "%s&", newword); continue; } s += skip; if (*s == ';') s++; sprintf(newword, "%s%s;", newword, ent); s--; } else sprintf(newword, "%s%c", newword, *s); } strcpy(newword, (char *) converttonamed(newword)); if (ASCIIENTITIES) strcpy(newword, (char *) converttoascii(newword)); return newword; } /* Returns a matching entity that matches the beginning of a string, if any. */ char *getent(s, skip) char *s; int *skip; { int i; static char ent[MAXENTLEN], testent[MAXENTLEN]; *skip = 0; strncpy(ent, s, MAXENTLEN); if (ent[1] == '#') { if (isdigit(ent[5])) return "\0"; for (i = 2; ent[i] != '\0' && isdigit(ent[i]); i++) ; while (ent[i] != '\0' && !isdigit(ent[i])) ent[i++] = '\0'; *skip = strlen(ent); return ent; } else { for (i = 0; entities[i] != NULL; i += 3) { strcpy(testent, entities[i]); if (testent[0] != '\0') { if (!strncmp(testent, ent, strlen(testent))) { strcpy(ent, testent); *skip = strlen(ent); return ent; } } } } return "\0"; } /* This is the real function called by convertentities() that ** changes numbered to named entities. */ char *converttonamed(s) char *s; { int i; char testent[MAXENTLEN], newent[MAXENTLEN]; static char newword[MAXWORDLEN]; strcpy(newword, s); for (i = 0; entities[i] != NULL; i += 3) { sprintf(testent, "%s;", entities[i + 1]); if (strstr(newword, testent) != NULL && (entities[i])[0] != '\0') { sprintf(newent, "%s;", entities[i]); strcpy(newword, (char *) replace(newword, testent, newent)); } } while (hasnumbered(newword)) strcpy(newword, (char *) converttonamed(newword)); return newword; } /* Does a string still contain numbered entities that can be converted? */ int hasnumbered(s) char *s; { int i; char testent[MAXENTLEN]; for (i = 0; entities[i] != NULL; i += 3) { sprintf(testent, "%s;", entities[i + 1]); if (strstr(s, testent) != NULL && (entities[i])[0] != '\0') return 1; } return 0; } /* This function converts all convertable named and numbered ** entities to their ASCII equivalents, if they exist. */ char *converttoascii(s) char *s; { int i; char *c, *d, wrdent[MAXENTLEN], nument[MAXENTLEN]; static char newword[MAXWORDLEN]; strcpy(newword, s); for (i = 0; entities[i] != NULL; i += 3) { sprintf(wrdent, "%s;", entities[i]); sprintf(nument, "%s;", entities[i + 1]); c = d = NULL; if ((entities[i])[0] != '\0') c = (char *) strstr(newword, wrdent); if ((entities[i + 1])[0] != '\0') d = (char *) strstr(newword, nument); if ((entities[i + 2])[0] != '\0') { if (c != NULL) strcpy(newword, (char *) replace(newword, wrdent, entities[i + 2])); if (d != NULL) strcpy(newword, (char *) replace(newword, nument, entities[i + 2])); } } while (hasnonascii(newword)) strcpy(newword, (char *) converttoascii(newword)); return newword; } /* Does a string still contain numbered or named entities that can ** be converted to ascii equivalents? */ int hasnonascii(s) char *s; { int i; char *c, *d; for (i = 0; entities[i] != NULL; i += 3) { c = d = NULL; if ((entities[i])[0] != '\0') c = (char *) strstr(s, entities[i]); if ((entities[i + 1])[0] != '\0') d = (char *) strstr(s, entities[i + 1]); if ((entities[i + 2])[0] != '\0') if (c != NULL || d != NULL) return 1; } return 0; }
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.