This is check.c in view mode; [Download] [Up]
/*
** Copyright (C) 1995, Enterprise Integration Technologies Corp.
** All Rights Reserved.
** Kevin Hughes, kevinh@eit.com
** 3/11/94
*/
#include "swish.h"
#include "check.h"
/* Check if a file with a particular suffix should be indexed
** according to the settings in the configuration file.
*/
int isoksuffix(filename, rulelist)
char *filename;
struct swline *rulelist;
{
int badfile;
char *c, suffix[MAXSUFFIXLEN], checksuffix[MAXSUFFIXLEN];
struct swline *tmplist;
tmplist = rulelist;
if (tmplist == NULL)
return 1;
if ((c = (char *) strrchr(filename, '.')) == NULL)
return 0;
badfile = 1;
strcpy(checksuffix, c + 1);
while (tmplist != NULL) {
if ((c = (char *) strrchr(tmplist->line, '.')) == NULL)
strcpy(suffix, tmplist->line);
else
strcpy(suffix, c + 1);
if (lstrstr(suffix, checksuffix) && strlen(suffix) ==
strlen(checksuffix))
badfile = 0;
tmplist = tmplist->next;
}
return !(badfile);
}
/* Check if a particular title should be ignored
** according to the settings in the configuration file.
*/
int isoktitle(title)
char *title;
{
int badfile;
struct swline *tmplist;
badfile = 0;
tmplist = titconlist;
while (tmplist != NULL) {
if (lstrstr(title, tmplist->line)) {
badfile = 1;
break;
}
tmplist = tmplist->next;
}
if (badfile)
return 0;
else
return 1;
}
/* Should a word be indexed? Consults the stopword hash list
** and checks if the word is of a reasonable length...
** If you have any good rules that can work with most languages,
** please let me know...
*/
int isokword(word)
char *word;
{
int i, same, hasnumber, hasvowel, hascons,
numberrow, vowelrow, consrow;
char lastchar;
if (word[0] == '\0')
return 0;
if (isstopword(word))
return 0;
if (strlen(word) < MINWORDLIMIT || strlen(word) > MAXWORDLIMIT)
return 0;
lastchar = ':';
same = 0;
hasnumber = hasvowel = hascons = 0;
numberrow = vowelrow = consrow = 0;
for (i = 0; word[i] != '\0'; i++) {
if (word[i] == lastchar) {
same++;
if (same > IGNORESAME)
return 0;
}
else
same = 0;
if (isdigit(word[i])) {
hasnumber = 1;
numberrow++;
if (numberrow > IGNOREROWN)
return 0;
vowelrow = 0;
consrow = 0;
}
else if (isvowel(word[i])) {
hasvowel = 1;
vowelrow++;
if (vowelrow > IGNOREROWV)
return 0;
numberrow = 0;
consrow = 0;
}
else if (!ispunct(word[i])) {
hascons = 1;
consrow++;
if (consrow > IGNOREROWC)
return 0;
numberrow = 0;
vowelrow = 0;
}
lastchar = word[i];
}
if (IGNOREALLV)
if (hasvowel && !hascons)
return 0;
if (IGNOREALLC)
if (hascons && !hasvowel)
return 0;
if (IGNOREALLN)
if (hasnumber && !hasvowel && !hascons)
return 0;
return 1;
}
/* Does a word have valid characters?
*/
int hasokchars(word)
char *word;
{
int i, j;
char c;
c = word[strlen(word) - 1];
for (i = j = 0; BEGINCHARS[i] != '\0'; i++)
if (word[0] == BEGINCHARS[i])
j++;
if (!j)
return 0;
for (i = j = 0; ENDCHARS[i] != '\0'; i++)
if (c == ENDCHARS[i])
j++;
if (!j)
return 0;
for (i = 0; word[i] != '\0'; i++)
for (j = 0; WORDCHARS[j] != '\0'; j++)
if (word[i] == WORDCHARS[j])
return 1;
return 0;
}
/* Is a letter a vowel?
*/
int isvowel(c)
char c;
{
if (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u')
return 1;
return 0;
}
/* This checks is a filename has one of the following suffixes:
** "htm", "HTM", "html", "HTML", "shtml", "SHTML".
*/
int ishtml(filename)
char *filename;
{
char *c, suffix[MAXSUFFIXLEN];
c = (char *) strrchr(filename, '.');
if (c == NULL)
return 0;
strcpy(suffix, c + 1);
if (suffix[0] == '\0')
return 0;
if (!strncmp(suffix, "htm", 3))
return 1;
else if (!strncmp(suffix, "HTM", 3))
return 1;
else if (!strncmp(suffix, "shtml", 5))
return 1;
else if (!strncmp(suffix, "SHTML", 5))
return 1;
return 0;
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.