ftp.nice.ch/pub/next/unix/network/www/swish.11.NIHS.bs.gnutar.gz#/swish.11/src/check.c

This is check.c in view mode; [Download] [Up]

/*
** Copyright (C) 1995, Enterprise Integration Technologies Corp.        
** All Rights Reserved.
** Kevin Hughes, kevinh@eit.com 
** 3/11/94
*/

#include "swish.h"
#include "check.h"

/* Check if a file with a particular suffix should be indexed
** according to the settings in the configuration file.
*/

int isoksuffix(filename, rulelist)
     char *filename;
     struct swline *rulelist;
{
	int badfile;
	char *c, suffix[MAXSUFFIXLEN], checksuffix[MAXSUFFIXLEN];
	struct swline *tmplist;

	tmplist = rulelist;
        if (tmplist == NULL)
                return 1;
	if ((c = (char *) strrchr(filename, '.')) == NULL)
		return 0;

	badfile = 1;
	strcpy(checksuffix, c + 1);
        while (tmplist != NULL) {
		if ((c = (char *) strrchr(tmplist->line, '.')) == NULL)
			strcpy(suffix, tmplist->line);
		else
			strcpy(suffix, c + 1);
                if (lstrstr(suffix, checksuffix) && strlen(suffix) ==
		strlen(checksuffix))
                        badfile = 0;
                tmplist = tmplist->next;
        }
	return !(badfile);
}

/* Check if a particular title should be ignored
** according to the settings in the configuration file.
*/

int isoktitle(title)
     char *title;
{
        int badfile;
        struct swline *tmplist;

        badfile = 0;
        tmplist = titconlist;
        while (tmplist != NULL) {
                if (lstrstr(title, tmplist->line)) {
                        badfile = 1;
                        break;
                }
                tmplist = tmplist->next;
        }
        if (badfile)
                return 0;
        else
                return 1;
}

/* Should a word be indexed? Consults the stopword hash list
** and checks if the word is of a reasonable length...
** If you have any good rules that can work with most languages,
** please let me know...
*/

int isokword(word)
      char *word;
{
        int i, same, hasnumber, hasvowel, hascons,
		numberrow, vowelrow, consrow;
	char lastchar;

	if (word[0] == '\0')
		return 0;

        if (isstopword(word))
                return 0;
        if (strlen(word) < MINWORDLIMIT || strlen(word) > MAXWORDLIMIT)
                return 0;

	lastchar = ':';
	same = 0;
	hasnumber = hasvowel = hascons = 0;
	numberrow = vowelrow = consrow = 0;
	for (i = 0; word[i] != '\0'; i++) {
		if (word[i] == lastchar) {
			same++;
			if (same > IGNORESAME)
				return 0;
		}
		else
			same = 0;
		if (isdigit(word[i])) {
			hasnumber = 1;
			numberrow++;
			if (numberrow > IGNOREROWN)
				return 0;
			vowelrow = 0;
			consrow = 0;
		}
		else if (isvowel(word[i])) {
			hasvowel = 1;
			vowelrow++;
			if (vowelrow > IGNOREROWV)
				return 0;
			numberrow = 0;
			consrow = 0;
		}
		else if (!ispunct(word[i])) {
			hascons = 1;
			consrow++;
			if (consrow > IGNOREROWC)
				return 0;
			numberrow = 0;
			vowelrow = 0;
		}
		lastchar = word[i];
	}

	if (IGNOREALLV)
		if (hasvowel && !hascons)
			return 0;
	if (IGNOREALLC)
		if (hascons && !hasvowel)
			return 0;
	if (IGNOREALLN)
		if (hasnumber && !hasvowel && !hascons)
			return 0;

	return 1;
}

/* Does a word have valid characters?
*/

int hasokchars(word)
     char *word;
{
	int i, j;
	char c;

	c = word[strlen(word) - 1];
	for (i = j = 0; BEGINCHARS[i] != '\0'; i++)
		if (word[0] == BEGINCHARS[i])
			j++;
	if (!j)
		return 0;
	for (i = j = 0; ENDCHARS[i] != '\0'; i++)
		if (c == ENDCHARS[i])
			j++;
	if (!j)
		return 0;
	for (i = 0; word[i] != '\0'; i++)
		for (j = 0; WORDCHARS[j] != '\0'; j++)
			if (word[i] == WORDCHARS[j])
				return 1;
	return 0;
}

/* Is a letter a vowel?
*/

int isvowel(c)
     char c;
{
	if (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u')
		return 1;
	return 0;
}

/* This checks is a filename has one of the following suffixes:
** "htm", "HTM", "html", "HTML", "shtml", "SHTML".
*/

int ishtml(filename)
     char *filename;
{
	char *c, suffix[MAXSUFFIXLEN];

	c = (char *) strrchr(filename, '.');

	if (c == NULL)
		return 0;
	strcpy(suffix, c + 1);
	if (suffix[0] == '\0')
		return 0;

	if (!strncmp(suffix, "htm", 3))
		return 1;
	else if (!strncmp(suffix, "HTM", 3))
		return 1;
	else if (!strncmp(suffix, "shtml", 5))
		return 1;
	else if (!strncmp(suffix, "SHTML", 5))
		return 1;
	return 0;
}

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.