ftp.nice.ch/pub/next/connectivity/infosystems/WAIStation.1.9.6.N.b.tar.gz#/WAIS/ir/server.c

This is server.c in view mode; [Download] [Up]

/* WIDE AREA INFORMATION SERVER SOFTWARE	
   No guarantees or restrictions.  See the readme file for the full standard 
   disclaimer.
   5.29.90	Harry Morris, morris@think.com
*/

/* this file is a server process for a unix machine that takes input from 
   standard in or from a socket and searches the local search engine on the 
   unix box.
   originally written by harry morris.
   modified by brewster kahle. 7/90
   6.xx.90	Brewster - initial implementation of stdio interface
   7.xx.90	Patrick Bray - support for headers and forking processes
   90.07.31	Ephraim - support for logging 

   91.03.03     Jonathan - set searchLog to log_out.
   91.05.23	Jonathan - added fork process for indexer.
                           Fixed version display so it exits.
   91.05.25     Jonathan - added setuid.
   
   Tue Jul  9 12:11:02 1991 -- Michael Haberler mah@wu-wien.ac.at

                Added semi-intelligent INFO database indexing (only done if
		any of the .src files is newer than INFO.dct)
		
		Locking against multiple concurrent INFO rebuilds if 
		running under inetd

		Use scandir() for directory operations

		Works under inetd as well as standalone. Here are my inetd.conf
   		entries (not the missing userid in the Ultrix inetd.conf!):

   hpux 7.0/800, Interactive/386 2.2.1:
	z3950 stream tcp nowait root /usr/local/etc/waisserver waisserver -s \
		-d /usr/logins/mah/wais-sources

   Ultrix 4.1:
	z3950 stream tcp nowait /usr/local/etc/waisserver waisserver -s \
		-d /usr/logins/mah/wais-sources

   Also, add the next line to /etc/services, and tickle your YP server:
	z3950           210/tcp         # wide area information server (wais)

 * $Log:	server.c,v $
 * Revision 1.48  92/05/10  14:47:39  jonathan
 * Update for release.
 * 
 * Revision 1.47  92/05/06  17:34:01  jonathan
 * Changed auto-indexing of .src files to use filename_finish_header_function.
 * 
 * Revision 1.46  92/05/04  17:18:32  jonathan
 * Fixed use of merge_pathname in creating INFO database.
 * 
 * Revision 1.45  92/04/28  15:19:18  jonathan
 * Added decoding of IP address to DNS name in init message handler.
 * 
 * Revision 1.44  92/03/26  18:26:41  jonathan
 * Added extra arguments to index_text_file call.
 * 
 * Revision 1.43  92/03/07  19:41:47  jonathan
 * Added IBM defines, courtesy mycroft@hal.gnu.ai.mit.edu,
 * 
 * Revision 1.42  92/03/05  07:07:58  shen
 * add two more dummy arguments to call to init_search_engine
 * 
 * Revision 1.41  92/02/27  09:58:50  jonathan
 * Put back in setting of core limit to max value, when SET_LIMIT is defined.
 * 
 * Revision 1.40  92/02/24  10:07:41  jonathan
 * Removed reporting functions.
 * 
 * Revision 1.39  92/02/21  12:27:15  jonathan
 * Changed logging of segmentation violation and bus errors to mark log with
 * an error code, and close code.
 * 
 * Revision 1.38  92/02/21  11:00:45  jonathan
 * Added wais_log_level
 * 
 * Revision 1.37  92/02/19  10:16:25  jonathan
 * Added build_catalog to auto-indexer.
 * 
 * Revision 1.36  92/02/16  12:33:21  jonathan
 * Removed code refering to NOINETNTOA, since we should use inet_ntoa.
 * 
 * Revision 1.35  92/02/16  12:05:18  jonathan
 * Added code for GCC incompatibility in inet_ntoa (passing structure as
 * pointer).
 * 
 * Revision 1.34  92/02/14  09:07:34  jonathan
 * Set MAXNAPTIME to 0 so it won't sleep.  Changed the WLOG_ERROR to
 * WLOG_WARNING in the log entry.
 * 
 * Revision 1.33  92/02/12  13:44:20  jonathan
 * Added "$Log" so RCS will put the log message in the header
 * 
 * 
 */

#define SERVER_DATE "Sun May 10 1992"

#ifndef lint
static char *RCSid = "$Header: /tmp_mnt/net/quake/proj/wais/wais-8-b5/ir/RCS/server.c,v 1.48 92/05/10 14:47:39 jonathan Exp $";
#endif

#define INFO_DICT    "INFO.dct"
#define LOCKFILE    "/tmp/INFO.lock" /* while re-indexing INFO */
#define NAPTIME     1		     /* seconds */
#define MAXNAPTIME  0		/* Don't wait, just go on. */

#include "server.h"
#include "sockets.h"
#include <sys/types.h>
#include <sys/stat.h>
#ifdef ultrix
#include <sys/file.h>
#else
#ifdef _IBMR2
#include <fcntl.h>
#else /* ! _IBMR2 */
#ifdef USG
#include <sys/fcntl.h>
#else
#include <sys/file.h>
#endif
#endif /* _IBMR2 */
#endif /* else ultrix */
#ifdef SYSV			
#define SIGCHLD SIGCLD
#endif
#include <signal.h>
#include <string.h>
#include "irdirent.h"
#include "panic.h"
#include "ustubs.h"
#include "transprt.h"
#include "wmessage.h"
#include "ir.h"
#include "wprot.h"
#include "cutil.h"
#include "futil.h"
#include "irext.h"
#include "irsearch.h"

/* to create the INFO index */
#include "irtfiles.h"
#include "irfiles.h"
#include "irhash.h"
#include "version.h"

static long bufferSize = BUFSZ; /* how much we are using
                                   (we get one of these per process) */

char *log_file_name = NULL;

FILE *logfile; /* the logfile */

/*---------------------------------------------------------------------------*/

#define TIMEOUT_LENGTH 36000 /* ten hour timeout. */
#define IDLE_TIME "10 hours"

void
serve_client(in,out, index_directory)
FILE* in;
FILE* out;
char *index_directory;
{ 
  char buf[BUFSZ];		/* contains the message and header */
  char *bufPtr ;		/* points at the begining of the z3950 */
  long size;			/* bytes in the z3950 message */
  WAISMessage header;		/* for storing the header */
  long i;
  long bytesLeft;
  struct itimerval new, old;
  long nextChar;

  new.it_interval.tv_sec = 0;
  new.it_interval.tv_usec = 0;
  new.it_value.tv_sec = TIMEOUT_LENGTH;
  new.it_value.tv_usec = 0;

  getitimer(ITIMER_REAL, &old);
  while (TRUE)
    {
      /* try to read the header */
      for (i = 0; i < HEADER_LENGTH; i++)
	{ 
	  setitimer(ITIMER_REAL, &new, NULL);
	  nextChar = fgetc(in);
	  if (nextChar == EOF)	/* my connection exited, so will I */
	    { 
	      return;
	    }
	  else
	    buf[i] = (char)nextChar;
	}

      setitimer(ITIMER_REAL, &old, NULL);
      /* parse the header */
      readWAISPacketHeader(buf,&header);

      /* make sure we have the right version.  
	 If we dont, we dont know what to do. */
      if (header.hdr_vers > HEADER_VERSION)
	panic("Incompatable header versions (Current version: %c, supplied version: %c.", 
	      HEADER_VERSION, header.hdr_vers) ;

      /* determine the size of the z3950 message */
      {
	char length_array[11];
	strncpy(length_array, header.msg_len, 10);
	length_array[10] = '\0';
	size = atol(length_array);
      }

      /* set bufPtr to start the z3950 message */
      bufPtr = buf + HEADER_LENGTH ;

      /* read the z3950 message */
      for (i = 0; i < size ; i++) {
	setitimer(ITIMER_REAL, &new, NULL);
	buf[i + HEADER_LENGTH] = (char)fgetc(in) ;
      }

      rewind(in);

      /* decode the z3950 if necessary */
      transportDecode((long)header.encoding,bufPtr,&size);
     
      /* XXX handle compression options */

      /* process it the z3950 */
      bytesLeft = bufferSize;

      size = interpret_buffer(bufPtr,size,bufPtr,bytesLeft,
			      &bufferSize,(long)header.hdr_vers,
			      index_directory); 

      /* re-encode the message if necessary */
      transportCode((long)header.encoding,bufPtr,&size); 

      /* XXX handle compression options */

      /* write the new header */
      writeWAISPacketHeader(buf,size,
			    (long)header.msg_type,header.server,
			    (long)header.compression,(long)header.encoding,
			    (long)header.hdr_vers);

      /* write the whole response to the output file */
      for (i = 0; i < size + HEADER_LENGTH; i++)
	fputc(buf[i],out) ;

      fflush(out);		/* flush any file buffers */
      rewind(out);		/* reset the file for read */

    }
}

/*---------------------------------------------------------------------------*/

#ifndef ISC
static void breakKey _AP((long s1,long s2,struct sigcontext* s3,char* s4));
#endif

static void
breakKey (s1,s2,s3,s4)
long s1;
long s2;
struct sigcontext *s3;
char *s4;
{
  if(0 != finished_search_engine())
    panic("unable to close search engine");
  panic ("got a ^c");
}

/*---------------------------------------------------------------------------*/

void
childhandler(sig, code, scp, addr)
long sig, code;
struct sigcontext *scp;
char *addr;
{
  wait(NULL);			/* give the kid a decent burial */
}

/*---------------------------------------------------------------------------*/

void
alarmhandler(sig, code, scp, addr)
long sig, code;
struct sigcontext *scp;
char *addr;
{
  waislog(WLOG_HIGH, WLOG_CLOSE,
	  "Server idle longer %s. Closing server and exiting.", IDLE_TIME);
  if(0 != finished_search_engine())
    panic("unable to close search engine");
  exit(0);
}

/*---------------------------------------------------------------------------*/

void
seghandler(sig, code, scp, addr)
long sig, code;
struct sigcontext *scp;
char *addr;
{
  waislog(WLOG_HIGH, WLOG_ERROR, "Segmentation violation.");
  waislog(WLOG_HIGH, WLOG_CLOSE, "Bummer. Closing server and exiting.");
  abort();
}

/*---------------------------------------------------------------------------*/

void
bushandler(sig, code, scp, addr)
long sig, code;
struct sigcontext *scp;
char *addr;
{
  waislog(WLOG_HIGH, WLOG_ERROR, "Bus error.");
  waislog(WLOG_HIGH, WLOG_CLOSE, "Bummer. Closing server and exiting.");
  abort();
}

/*---------------------------------------------------------------------------*/

#include <pwd.h>

int finduid(name)
char *name;
{
  struct passwd *pwent;

  if ((pwent = getpwnam(name)) == NULL) {
    return -1;
  }

  return(pwent->pw_uid);
}

static  char *index_dir = NULL;
static  time_t info_change_time;
static  int indexing_needed = 0;
static  char *info_dict = INFO_DICT;

extern int alphasort();

/* selecttion function for scandir()
 * trigger on ".src" extension, regular file, and != "INFO.src"
 * Indexing is needed if any of the .src files is younger than 
 * INFO.dct
 */
static int
srcfiles(e)
	struct dirent *e;
{
	struct stat sb;
	char *lastdot = strrchr(e->d_name,'.');
	int candidate;

	candidate =	lastdot && 
	      (stat(merge_pathnames(e->d_name,index_dir), &sb) >= 0) && 
	      ((sb.st_mode & S_IFMT) == S_IFREG) &&
	      !strcmp(lastdot,source_ext) && 
	      strcmp(e->d_name,info_dict); /* whew */

        if (candidate) {
	    indexing_needed |= (sb.st_mtime > info_change_time);
	    return 1;
	}
	return 0;
}


/*---------------------------------------------------------------------------*/

#ifdef SET_LIMIT
#include <sys/resource.h>
#endif

#define INDEX_FORK

extern char *inet_ntoa ();

void
main(argc,argv)
int argc;
char* argv[];
{ FILE *file;
  long socket;
  char *next_argument = next_arg(&argc, &argv), *command_name;
  boolean use_stdio = TRUE;		/* default is true */
  /* char *log_file_name = NULL; */	/* name of file for error output */
  int child_proc;		/* for the child process id */
  char *uid_name = "root";	/* user id so setuid if root */
  int uid = 0;		/* if not specified, leave as root. */
  int child;
  long cm_mem_percent = 0;  /* default */
  struct stat statbuf;
  struct dirent **list;
  int naptime = 0;
  extern int errno;
  extern char *sys_errlist[];
  char host_name[255], host_address[255];
  extern void filename_finish_header_function();

#ifdef SET_LIMIT
  struct rlimit rlp;

  getrlimit(RLIMIT_CORE, &rlp);
  rlp.rlim_cur = rlp.rlim_max;
  setrlimit(RLIMIT_CORE, &rlp);
#endif

  tcp_port = 210;			/* tcp_port to use */
  command_name = next_argument;
  host_name[0] = 0;
  host_address[0] = 0;

  server_name = s_malloc(255);
  gethostname(server_name, 255);

  wais_pid = getpid();

  if (!strcmp(command_name, "waisserver.d")) {
    struct sockaddr_in source;
    int sourcelen;

    sourcelen = sizeof(struct sockaddr_in);

    if (!getpeername(fileno(stdout),&source,&sourcelen)) {
      struct hostent *peer = NULL;

      peer = gethostbyaddr(&source.sin_addr, 4, AF_INET);

      if(peer != NULL)
	sprintf(host_name, "%s", peer->h_name);

      sprintf(host_address, "%s",
#if defined(sparc) && defined(__GNUC__)
	      inet_ntoa(&source.sin_addr)
#else
	      inet_ntoa(source.sin_addr)
#endif /* sparc */
	      );
    }
    else sprintf(host_address, "Error getting socket: %d, %s.", errno, sys_errlist[errno]);

    use_stdio = TRUE;
  }

  if (argc == 0){
    fprintf(stderr,"Usage: %s [-p [port_number]] [-s] [-d directory] [-u user] [-cmmem number] [-v]\n",
	   command_name);
    fprintf(stderr," -p [port] listen to the port.  If the port is supplied, then\n");
    fprintf(stderr,"    that tcp_port number is used.  If it is not supplied \n");
    fprintf(stderr,"    then the Z39.50 port (210) is used.\n");
    fprintf(stderr," -d directory: means to use the directory as the source of databases.\n");
    fprintf(stderr,"    Defaults to the current directory.\n");
    fprintf(stderr," -e [file]: set log output to file, or /dev/null if not specified.\n");
    fprintf(stderr," -l log_level: set log level.  0 means log nothing,\n");
    fprintf(stderr,"    10 [the default] means log everything.\n");
    fprintf(stderr," -s means listen to standard I/O for queries.  This is the default\n");
    fprintf(stderr," -u user: if started as root, setuid to user after startup.\n");
    fprintf(stderr," -cmmem number: percentage of CM memory to use (CM code only).\n");
    fprintf(stderr," -v prints the version.\n");
    exit(1);
  }
  if(NULL == (next_argument = next_arg(&argc, &argv))){
    fprintf(stderr,"No arguments specified\n");
    exit(0);
  }
  while((next_argument != NULL) &&
	('-' == next_argument[0])){

    /* then we have an argument to process */
    if (0 == strcmp("-p", next_argument)){
      char *peek_argument = peek_arg(&argc, &argv);
      use_stdio = FALSE;
      if ((NULL != peek_argument) && /* if we are not out of args */
	  ('-' != peek_argument[0])){ { /* and the next isn't an option... */
	    /* get the port number */
	    tcp_port = atoi(next_arg(&argc, &argv));
	  }			/* end if (explicit tcp_port) */
				    }
    }				/* end if (-p) */
    else if (0 == strcmp("-s", next_argument)){
      use_stdio = TRUE;
    }				/* end if (-s) */

    else if (0 == strcmp("-e", next_argument)) {
      char *peek_argument = peek_arg(&argc, &argv);
      log_file_name = "/dev/null"; /* default to /dev/null */
      if ((peek_argument != NULL) &&
	  ('-' != peek_argument[0])) {
	log_file_name = next_arg(&argc, &argv);
      }				/* end if (explicit log file) */
    }				/* end if (-e) */
    else if (0 == strcmp("-l", next_argument)) {
      wais_log_level = atol(next_arg(&argc, &argv));
    }				/* end if (-l) */
    else if (0 == strcmp("-d", next_argument)) {
      index_dir = next_arg(&argc, &argv);
    }
    else if (0 == strcmp("-v", next_argument)) {
      fprintf(stderr,"%s: %s, %s\n", command_name, VERSION, SERVER_DATE);
    }
    else if (0 == strcmp("-u", next_argument)) {
      uid_name = next_arg(&argc, &argv);
      if((uid = finduid(uid_name)) < 0)
	panic("Couldn't find user %s.", uid_name);
    }
    else if(0 == strcmp("-cmmem", next_argument)){
      if(NULL == (next_argument = next_arg(&argc, &argv)))
	panic("Expected a number (1-100) for percentage of memory to use");
      cm_mem_percent = atol(next_argument);
      if(cm_mem_percent < 1)
	panic("The -cmmem argument should not be less than 1 and less than 100");
      if(cm_mem_percent > 100)
	panic("Warning: The -cmmem parameter was %ld%%. It should be between 1-100.", cm_mem_percent);
    }
    else{
      panic("Don't recognize the %s option", next_argument);
    }
    next_argument = next_arg(&argc, &argv);
  }				/* end while (more arguments) */

  if (use_stdio && log_file_name == NULL) 
    log_file_name = "/dev/null";

  if (log_file_name == NULL) 
    logfile = stderr;
  else logfile = NULL;
  
  index_dir = index_dir ? index_dir : ".";  
  info_dict = s_strdup(merge_pathnames(info_dict,index_dir));

  if(0 != init_search_engine(index_dir, false, true, cm_mem_percent,0,0))
    panic("unable to initialize search engine");
  
  /* remember timestamp on INFO.dct if rebuilding needed 
   * If it doesnt exist, it's assumed to be *very* old, to force
   * re-indexing
   */
  info_change_time = (stat(info_dict,&statbuf) == -1) ? 0 : statbuf.st_mtime;
  
  /* compare with candidates */

  if (scandir(index_dir, &list, srcfiles, alphasort) < 0) {
      waislog(WLOG_HIGH, WLOG_ERROR, 
	      "Error: reading directory %s, %s", 
	      index_dir, sys_errlist[errno]);
      indexing_needed = FALSE;
  }
  
  /* ok. we know if we need indexing, 
   * and have all the filenames. 
   */
  
  if (info_change_time == 0) indexing_needed = TRUE;
  if (indexing_needed) {

    /* Time to re-index,
     * aquire the lock 
     */
    waislog(WLOG_MEDIUM, WLOG_INDEX,
	    "re-indexing needed, info_change_time=%d",info_change_time); 

    if (open(LOCKFILE, O_WRONLY|O_CREAT|O_EXCL,0666) == -1) {
	  
      /* already locked by somebody else
       * spin  till she finishes
       */
      while (!(stat(LOCKFILE,&statbuf) == -1)) {
	sleep(NAPTIME);
	naptime += NAPTIME;
	waislog(WLOG_MEDIUM, WLOG_INFO,
		"INFO locked, waiting since %d seconds", naptime);
	if (naptime  > MAXNAPTIME)  {

	  waislog(WLOG_HIGH, WLOG_WARNING,
		  "Warning - lockfile %s won't go away after %d seconds, not reindexing.", 
		  LOCKFILE, naptime);
	  break;
	}
      }
      /* if lockfile went away, assume INFO.* build finished
       * so just use it
       */
    } else {			/* we aquired the lock, so rebuild database  */
	  
#ifdef INDEX_FORK
      if (!(child = fork())) {
#endif
	database *db;
	struct dirent **s = list;
	char filename[MAX_FILENAME_LEN], *dbname;

	waislog(WLOG_MEDIUM, WLOG_INDEX,
		"Creating INFO database, pid=%d",getpid());
	dbname = s_strdup(merge_pathnames("INFO",	index_dir));
	db = openDatabase(dbname, true, /* maybe this should append XXX */
			  false);
	s_free(dbname);
	init_add_word(db, 0, 100000L);

	while (*s) {		/* index it */
	  strncpy(filename, index_dir, MAX_FILENAME_LEN);
	  if(index_dir[strlen(index_dir) -1] != '/')
	    strncat(filename, "/", MAX_FILENAME_LEN);
	  strncat(filename, (*s)->d_name, MAX_FILENAME_LEN);
	  waislog(WLOG_MEDIUM, WLOG_INDEX,
		  "Indexing %s", filename);
	  index_text_file(filename, NULL, NULL, NULL, 
			  filename_finish_header_function,
			  "WSRC", db, true, false,
			  false, true);
	  s++;
	}
	freedir(list);		/* array of filenames */
	      
	if(!probe_file(source_filename(filename, db)))
	  write_src_structure(source_filename(filename, db),
			      "INFO", "WSRC", NULL, 0L, true, tcp_port);
	finished_add_word(db);
	build_catalog(db);
	closeDatabase(db);
	if (unlink(LOCKFILE))
	  panic("Indexer: cant unlink lockfile!\n");
	waislog(WLOG_MEDIUM, WLOG_INDEX,
		"Indexer pid=%d done", getpid());
	      
#ifdef INDEX_FORK
	exit(0);		/* indexing child */
      }
      else if (child == -1) {
	waislog(WLOG_HIGH, WLOG_ERROR,
		"Unable to fork for indexer.");
	exit(1);
      }
      /* wait for child process */
      else while (wait(0) != child) ; /* do nothing */
#endif      
    }
  }


  if (use_stdio == TRUE) {
    if(host_address[0] != 0){
      waislog(WLOG_MEDIUM, WLOG_CONNECT,
	      "Accepted connection from: %s [%s], %s",
	      host_name, host_address, VERSION);
    }
    else{
      waislog(WLOG_MEDIUM, WLOG_CONNECT,
	      "Couldn't determine peer connection. %s", VERSION);
    }
  }
  else{
    waislog(WLOG_MEDIUM, WLOG_INFO, "Running server %s", VERSION);
  }

  signal(SIGINT, breakKey);

  signal(SIGCHLD, childhandler);  	/* XXX dont really need this any more */
  signal(SIGALRM, alarmhandler);

  signal(SIGSEGV, seghandler);

  signal(SIGBUS, bushandler);

  if(use_stdio == FALSE)
   { 
     if (tcp_port < 1024 && getuid() != 0) {
       waislog(WLOG_HIGH, WLOG_ERROR,
	       "Error opening port %d:  Must be superuser to use a port < 1024",
	       tcp_port);
       exit(-1);
     }

     open_server(tcp_port,&socket,BUFSZ);

#ifdef SECURE_SERVER
     /* if root, setuid to user specified id. */
     if (uid > 0 && getuid() == 0)  {
       waislog(WLOG_MEDIUM, WLOG_INFO,
	       "Setting uid to %s.", uid_name);
       if (chown(log_file_name,uid,getgid()) < 0)
	 waislog(WLOG_HIGH, WLOG_ERROR,
		 "Unable to chown log file to %s!", uid_name);
       if ( 0 > setuid(uid)) {
	 waislog(WLOG_HIGH, WLOG_ERROR,
		 "Unable to setuid to %s!  Exiting.", uid_name);
	 exit(-1);
       }
     }
#endif
     while (TRUE) { /* be a server for several connections */
       accept_client_connection(socket,&file);
	  
       if ((child_proc = fork()) == 0) {
	      
	      /* grandson handles this connection
	       * double-fork takes care of zombies 
	       */
	      if ((child_proc = fork()) == 0) { 
		wais_pid = getpid();
		log_line = 0;
		serve_client(file, file, index_dir);
		/* but leaves server up */
		close_client_connection(file);
		close_server(socket);
		/* just exits this child */
		waislog(WLOG_MEDIUM, WLOG_CLOSE,
			"Done handling client");
		exit(0);
	      } else {
		/* son: orphans the grandchild, so init picks up 
		 * the exit status
		 */
		exit(0);
	      }
          } else {
	      waislog(WLOG_MEDIUM, WLOG_INFO,
		      "Child PID = %d", child_proc);
	      close_client_connection(file);     /* parent shouldn't keep the file */
	  }
      }
   }
  else if(use_stdio == TRUE)
   { /* connections on stdio don't use child processes yet */
     serve_client(stdin, stdout, index_dir);
     waislog(WLOG_MEDIUM, WLOG_CLOSE,
	     "Done handling client");
      /* close the whole thing */
     if(0 != finished_search_engine())
       panic("unable to close search engine");
     exit(0);
   }
}

/*---------------------------------------------------------------------------*/

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.