ftp.nice.ch/pub/next/connectivity/infosystems/WAIStation.1.9.6.N.b.tar.gz#/WAIS/ir/ir.c

This is ir.c in view mode; [Download] [Up]

/* WIDE AREA INFORMATION SERVER SOFTWARE:
   No guarantees or restrictions.  See the readme file for the full standard
   disclaimer.	
  
*/

/* Change log:
 * $Log:	ir.c,v $
 * Revision 1.49  92/05/10  14:43:35  jonathan
 * Made a little safer on NULL docid's when parsing.
 * 
 * Revision 1.48  92/05/05  14:56:33  jonathan
 * Added definition of S_ISDIR for Mach and NeXT.  Fixed isdoctype macro to
 * check for NULLs.
 * 
 * Revision 1.47  92/05/04  11:28:26  jonathan
 * Changed logging result list to use log_level a little better.
 * 
 * Revision 1.46  92/04/30  12:24:45  jonathan
 * split =* for ULTRIX CC.
 * changed a couple of s_free's to free's for ULTRIX CC too.
 * 
 * Revision 1.45  92/04/01  17:08:14  jonathan
 * Added code to handle FTP-like searches.
 * 
 * Revision 1.44  92/03/23  13:25:22  shen
 * only print out the number of the results but not the message in the log
 * 
 * Revision 1.43  92/03/18  08:55:45  jonathan
 * Removed databaseName argument to getDocumentText and getData.
 * 
 * Revision 1.42  92/03/05  07:07:20  shen
 * add two more dummy arguments to call to init_search_engine
 * 
 * Revision 1.41  92/02/23  10:37:53  jonathan
 * enforced limit on results in handleRelevanceFeedbackSearch (particularly
 * for handline help queries).
 * 
 * Revision 1.40  92/02/23  09:57:57  jonathan
 * Prevent return of help messages if query is empty but there are relevant
 * documents.
 * 
 * Revision 1.39  92/02/21  11:07:25  jonathan
 * Added RCSIdent.
 * 
 * Revision 1.38  92/02/21  11:00:10  jonathan
 * Changed logging of init message to WLOG_MEDIUM.
 * 
 * Revision 1.37  92/02/19  16:56:48  jonathan
 * mucked a bit with the no-results case.
 * 
 * Revision 1.36  92/02/19  13:55:22  jonathan
 * Return catalog as result if no hits to search.
 * Plug some more memory leaks.
 * 
 * Revision 1.35  92/02/19  10:39:27  jonathan
 * Off by one in last fix (headerNum>=0).  Fixed it.
 * 
 * Revision 1.34  92/02/19  10:16:04  jonathan
 * Added code to handle too many headers for buffer.  Reduces the number of
 * headers until they fit, and marks the log with a warning.
 * 
 * Revision 1.33  92/02/17  12:40:04  jonathan
 * Return catalog as well as source description for help queries.
 * 
 * 
 * Revision 1.32  92/02/12  13:20:49  jonathan
 * Added "$Log" so RCS will put the log message in the header
 * 
*/

#ifndef lint
static char *RCSid = "$Header: /tmp_mnt/net/quake/proj/wais/wais-8-b5/ir/RCS/ir.c,v 1.49 92/05/10 14:43:35 jonathan Exp $";
#endif

/*----------------------------------------------------------------------*/
/* This code implements a simple Z39.50+WAIS server, which consults a 
   local database using Brewster's search engine.  The main routine is
   interpret_buffer() which reads the contents of a receive buffer, and 
   writes results back to a send buffer.

The basic structure is:

interpret_buffer gets bytes and returns bytes from whatever transport 
   mechanism. It calls either handleInit (which handles init requests)
   or handleSearch.

handleSearch calls either handleRelevanceFeedbackSearch or 
   handleElementRetrieval based on the type of question.

handleElementRetrieval calls getData or getDocumentText to answer that 
   question.

handleRelevanceFeedbackSearch calls run_search and/or help_header to answer 
   the question.


A server must supply:
  getData, getDocumentText, run_search, and help_header
then it should work.

   To do:
   - help facilities:
     on a null query pass back random documents from the pool (?)
   - Add dates to search responses
 */

/* change log:
 *  3/91 fixed db name defaulting for info server
 *  5/31/91 fixed handleRelevanceFeedbackSearch to do search if
 *          no seedwords but relevant document supplied - JG
 *  5/31/91 fixed databaseName in handleElementRetrieval - HWM
 *  7/19/91 fixed handleElementRetrieval prototype -BK
 */
/*----------------------------------------------------------------------*/

#include "server.h"
#include "ir.h"
#include "wprot.h"
#include "irsearch.h"
#include "docid.h"
#include "cutil.h"
#include "irfiles.h" /* for pathname_name */
#include "irretrvl.h"
#include "sockets.h"  /* for connect_to_server */
#include "panic.h"

#include <string.h>
#include <ctype.h>
#include <math.h>

#ifdef ANSI_LIKE
#include <stdlib.h>
#else
#include "ustubs.h"
#endif

#include <sys/stat.h>
#include "irdirent.h"

/* forward declarations */

static boolean needs_help _AP ((char *question));

static WAISDocumentHeader *help_header _AP((char *database_name, 
					    char *index_directory));

static WAISDocumentHeader *catalog_header _AP((char *database_name, 
					       char *index_directory,
					       boolean results));

static void handleInit _AP((char** recBuf,char** sendBuf,
			    long* sendBufLen,
			    long* maxBufLen));
                       
static void handleSearch _AP((char** recBuf,char** sendBuf,
			      long* sendBufLen,
			      long waisProtocolVersion,
			      char *index_directory));

static void handleRelevanceFeedbackSearch _AP((SearchAPDU* search,
					       char** sendBuf,long* sendBufLen,
					       long waisProtocolVersion,
					       char *index_directory));
                                          
static void handleElementRetrieval _AP((SearchAPDU* search,
					char** sendBuf,
					long* sendBufLen,
					long waisProtocolVersion,
					char *index_directory));
                                   
static void handleFTPSearch _AP((SearchAPDU* search,
				 char** sendBuf,long* sendBufLen,
				 long waisProtocolVersion,
				 char *index_directory));

#define isdoctype(doc, doctype) \
 (((doc) != NULL) && \
 ((doc)->Type != NULL) && \
 !strcmp((doc)->Type, doctype))

#ifdef Mach
#include <sys/inode.h>
#define S_ISDIR(f_mode) (f_mode & IFDIR)
#endif /* Mach */

#if (defined(NeXT) && !(defined(S_ISDIR)))
#define S_ISDIR(f_mode) ((f_mode) & S_IFDIR)
#endif

/*----------------------------------------------------------------------*/
/* Utility */
  
/*----------------------------------------------------------------------*/
/* note - at present, it is not clear to me what the value of present-status
   is, and how it is to be interpreted.  Furthermore, are our text retrieval
   queries considered presents, or are they searches?
 */
 

/*----------------------------------------------------------------------*/

/* interpret_buffer()
char* receiveBuffer - buffer containing data to interpret
long receiveBufLen - how much data is there
char* sendBuffer - buffer to write results to
long sendBufLen - how much space there is to write to
long* maxBufferSize - see below
long waisProtocolVersion - what version of the wias protocol is in use
char *index_directory - the directory to find the indexes on a search

maxBufferSize is a pointer to a per connection variable that contains the
maximum size send/receive buffer to use.  Seems a lot like sendBufLen
does't it.  Well it usually is, but not always.

Here is how it works from a server's point of view.  

When the client connection is first established, the server spawns a new
process to deal with it.  The new process contains a global variable
(bufferSize in server.c) which is initialized to BUFSZ (defined in server.h).
This is the physical size of the server's internal bufferes.
Clearly that is the absolute maximum size of any z3950 message to or from
this server.

So now *maxBufferSize = sendBufLen.  

Now, the first thing that a z3950 client is likely to do is send an init
APDU.  The only useful thing (and it is useful) that the init APDU
currently does is allow the client and server to negotiate the maxumum size
of the messages that they will send.  This takes place somewhere down
inside of the interpret_buffer() logic where the APDU's are decoded and
response APDU's are recoded.  A pointer to bufferSize is passed to
interpret_buffer() in the maxBufferSize argument, and if the buffer happens
to contain an init message, bufferSize is changed (for the rest of the
connection).

That is the only function maxBufferSize serves.  Note that I could have
gotten rid of sendBufLen, and just used *maxBufferSize, but sendBufLen can
be and does get modified by the z3950 APDU writting code, and we don't want
the overall value being modified.

*/

long
interpret_buffer(receiveBuffer,
		    receiveBufLen,
		    sendBuffer,
		    sendBufLen,
		    maxBufferSize,
		    waisProtocolVersion,
		    index_directory)
char* receiveBuffer;
long receiveBufLen;
char* sendBuffer;
long sendBufLen;
long* maxBufferSize;
long waisProtocolVersion;
char *index_directory;
/* read & interpret receiveBuffer until receiveBufLen.  Write results into
   send buffer.  Return number of bytes written - negative if there was an 
   error 
 */
{
  char* readPos = receiveBuffer;
  char* writePos = sendBuffer;

  while (readPos - receiveBuffer < receiveBufLen && /* there is more to read */
         writePos != NULL	/* no write error */
	 )
    { pdu_type pdu = peekPDUType(readPos);
      switch (pdu)
	{ case initAPDU:
	    handleInit(&readPos,&writePos,&sendBufLen,maxBufferSize);
	    break;
	  case searchAPDU:
	    handleSearch(&readPos,&writePos,&sendBufLen,
			 waisProtocolVersion, index_directory);
	    break;
	  default:
	    /* unknown APDU error */
	    writePos = NULL;
	    waislog(WLOG_HIGH, WLOG_ERROR,
		    "Error in interpret_message: unknown APDU type.");
	    break;
	  }
    }
  
  if(writePos == NULL) {
    waislog(WLOG_HIGH, WLOG_ERROR,
	    "Error in interpret_message: NULL writePos.");
    return (0);
  }
  else return(writePos - sendBuffer);
}

/*----------------------------------------------------------------------*/

static void handleInit _AP((char** recBuf,char** sendBuf,
			    long* sendBufLen,long* maxBufferSize));

static void
handleInit(recBuf,sendBuf,sendBufLen,maxBufferSize)
char** recBuf;
char** sendBuf;
long* sendBufLen;
long* maxBufferSize;
/* negotiate functionality and buffer sizes.  A session ususally begins
   with one of these, but is not required to.  
   NOTE - even if the server decides not to accept the client, it does
   not shut down the connection.  It simply declies acceptatance, and 
   waits for the client to shut down.
 */
{
  InitAPDU* anInit = NULL;
  
  /* read the init - note there is no WAIS protocol extension here */
  *recBuf = readInitAPDU(&anInit,*recBuf);
  
  if (recBuf == NULL || *recBuf == NULL)
    { *sendBuf = NULL;		/* error in the read */
      return;
    }
  else				/* respond to the init */
    { InitResponseAPDU* reply = NULL;
      WAISInitResponse* wais_response = NULL;
      boolean connectionAccepted;
     
      /* negotiate services */
      if (anInit->willPresent == false &&
	  anInit->willDelete == false)
	connectionAccepted = true;
      else
	connectionAccepted = false;
       
      /* negotiate buffer sizes */
      if (*maxBufferSize > anInit->MaximumRecordSize)
	*maxBufferSize = anInit->MaximumRecordSize;
     
      if(anInit->IDAuthentication != NULL)
	waislog(WLOG_MEDIUM, WLOG_INFO, "Init message: %s", 
		anInit->IDAuthentication);

      /* not much use huh? */
      wais_response = makeWAISInitResponse(0L,0L,NULL,NULL,NULL,NULL); 
     
      reply = makeInitResponseAPDU(connectionAccepted,
				   true,false,false,false,false,*maxBufferSize,
				   *maxBufferSize,NULL,
				   defaultImplementationID(),
				   defaultImplementationName(),
				   defaultImplementationVersion(),NULL,
				   wais_response);

      /* write it */
      *sendBuf = writeInitResponseAPDU(reply,*sendBuf,sendBufLen);
	 
      /* free everything */
      freeInitAPDU(anInit);
      freeInitResponseAPDU(reply);
      freeWAISInitResponse(wais_response);
    }
}


/*----------------------------------------------------------------------*/
static boolean
isRemoteDB(db)
char * db;
{
  return(strchr(db,'@') != NULL);
}

/*----------------------------------------------------------------------*/
#include "wmessage.h"

struct {
  char host[256];
  long port;
  FILE *connection;
  long buffer_size;
} last_connection;

static void
forwardSearch(aSearch,sendBuf,sendBufLen,waisProtocolVersion)
SearchAPDU* aSearch;
char** sendBuf;
long* sendBufLen;
long waisProtocolVersion;
{
  FILE *connection;
  char hostname[1000], db[1000], *p, *p2;
  long port, len, rlen;
  char message[BUFSZ], response[BUFSZ];

  p = strchr(aSearch->DatabaseNames[0], '@');
  strncpy(db, aSearch->DatabaseNames[0], p-aSearch->DatabaseNames[0]);
  db[p-aSearch->DatabaseNames[0]] = 0;
  p2 = strchr(p+1, ':');
  if(p2 == NULL) {
    strcpy(hostname, p+1);
    port = 210;
  }
  else {
    strncpy(hostname, p+1, p2-(p+1));
    hostname[p2-(p+1)] = 0;
    port = atoi(p2+1);
  }

  strcpy(aSearch->DatabaseNames[0], db);
  rlen = len = BUFSZ;
  writeSearchAPDU(aSearch, message+HEADER_LENGTH, &len);
  len = BUFSZ-len;
  if(hostname[0] != 0) {
    if(strcmp(hostname, last_connection.host) == 0 &&
       port == last_connection.port)
      connection = last_connection.connection;
    else {
      if (last_connection.connection != NULL)
	close_connection(last_connection.connection);
      strcpy(last_connection.host, hostname);
      last_connection.port = port;
      last_connection.connection = (FILE*)connect_to_server(hostname, port);
      connection = last_connection.connection;
      if(connection != NULL) {
	char userInfo[500], hostname[80], init_message[1000];

	gethostname(hostname, 80);
#ifdef TELL_USER
	sprintf(userInfo, "server forwarding %s, from host: %s, user: %s",
		VERSION, hostname, getenv("USER"));
#else
	sprintf(userInfo, "server forwarding %s, from host: %s", VERSION, hostname);
#endif

	last_connection.buffer_size =
	  init_connection(init_message, response,
			  BUFSZ,
			  connection,
			  userInfo);
      }
    }
    if(connection != NULL)
      {
	len = interpret_message(message, len,
				response, last_connection.buffer_size,
				connection, false);
      }
    else {
      static diagnosticRecord* diags[2] = {NULL, NULL};
      SearchResponseAPDU* response = NULL;
      WAISSearchResponse* wais_response = NULL;
      char message[255];

      sprintf(message, "Database not available: %s@%s:%d.",
	      db, last_connection.host, last_connection.port);
      diags[0] = makeDiag(true,D_RecordNotAuthorizedToBeSent,
			  message);

      wais_response = makeWAISSearchResponse(NULL,NULL,NULL,
					     NULL,NULL,NULL,NULL,diags);
      response = makeSearchResponseAPDU(0L,0L,
					1L,
					0L,UNUSED,FAILURE,
					aSearch->ReferenceID, wais_response);
      *sendBuf = writeSearchResponseAPDU(response,*sendBuf,sendBufLen);

      freeSearchResponseAPDU(response);
      freeWAISSearchResponse(wais_response);
      waislog(WLOG_HIGH, WLOG_ERROR, message);
      return;
    }
  }
  else
    len = interpret_message(message, len,
			    response, last_connection.buffer_size,
			    NULL, false);
  bcopy(response+HEADER_LENGTH, *sendBuf, len);
  *sendBuf+=len;
}

/*----------------------------------------------------------------------*/

static void
handleSearch(recBuf,sendBuf,sendBufLen,waisProtocolVersion,index_directory)
char** recBuf;
char** sendBuf;
long* sendBufLen;
long waisProtocolVersion;
char *index_directory;
/* figure out what kind of search this is, (query or retrieval) and
   dispatch to the appropriate function 
 */
{
  SearchAPDU* aSearch = NULL;

  /* read the search data */
  *recBuf = readSearchAPDU(&aSearch,*recBuf);

  if (*recBuf == NULL)
    { *sendBuf = NULL;		/* error in the read */
      return;
    }
  else
    {				/* dispatch on the query type */
      if((aSearch->DatabaseNames != NULL) &&
	 (aSearch->DatabaseNames[0] != NULL) &&
	 isRemoteDB(aSearch->DatabaseNames[0]))
	forwardSearch(aSearch,sendBuf,sendBufLen,waisProtocolVersion);
      else {
	if (strcmp(aSearch->QueryType,QT_TextRetrievalQuery) == 0) {
	  handleElementRetrieval(aSearch,sendBuf,sendBufLen,
				 waisProtocolVersion, index_directory);
	}
	else if (strcmp(aSearch->QueryType,QT_RelevanceFeedbackQuery) == 0) {
	  char *seeds, *s;

	  s = seeds = s_strdup(((WAISSearch *)aSearch->Query)->SeedWords);
	  while(*s != 0) {
	    if(*s == '\n' || *s == '\r') *s = ' ';
	    s++;
	  }

	  if(aSearch->DatabaseNames != NULL &&
	     aSearch->DatabaseNames[0] != NULL)
	    waislog(WLOG_LOW, WLOG_SEARCH,
		    "Search! Database: %s, Seed Words: %s", 
		    aSearch->DatabaseNames[0], 
		    seeds);
	  else
	    waislog(WLOG_LOW, WLOG_SEARCH, 
		    "Search! Database: None, Seed Words: %s", 
		    seeds);

	  handleRelevanceFeedbackSearch(aSearch,sendBuf,sendBufLen,
					waisProtocolVersion,
					index_directory);
	}
	else {
	  waislog(WLOG_HIGH, WLOG_ERROR, "Unknown search type");
	  *sendBuf = NULL;	/* error - unknown search type */
	}
	fflush(stderr);
      }
    }
}


	 

/*----------------------------------------------------------------------*/

static boolean needs_help(question)
char *question;
/* returns true if the user wants help */
{
  if(question[0] == '\0')  /* null question, must need help */
    return(true);
  if(question[0] == '?')
    return(true);
  if(question[0] == '*')
    return(true);
  if(strlen(question) < 20){
    if((NULL != strstr(question, "help")) ||
       (NULL != strstr(question, "HELP")) ||
       (NULL != strstr(question, "Help")) ||
       (NULL != strstr(question, "all"))){
      return(true);
    }      
  }
  return(false);
}

/* returns a help header to be returned or NULL if not possible */
static WAISDocumentHeader *help_header(database_name, index_directory)
     char *database_name;
     char *index_directory;
{
  /* make a help document */
  hit help;
  char local_id[MAX_FILENAME_LEN + 60];

  strncpy(help.filename,
	  merge_pathnames(database_name,index_directory), 
	  MAX_FILENAME_LEN);
  strncat(help.filename, source_ext, MAX_FILENAME_LEN);
  /* printf("help filename %s", help.filename); */

  strncpy(help.headline, "Information on database: ", MAX_FILENAME_LEN);
  strncat(help.headline, pathname_name(database_name), 
	  MAX_FILENAME_LEN);
  sprintf(local_id, "%ld %ld %s", 0L, 0L, help.filename);

  if(probe_file(help.filename))
    { 
      DocID* theDocID = NULL;
      long length;
      long lines;
      char **type = NULL;

      help.start_character = 0;
      help.end_character = 0;
	
      { FILE *stream = s_fopen(help.filename, "r");
	lines = count_lines(stream);
	length = file_length(stream);
        s_fclose(stream);
      }

      type = (char**)s_malloc((size_t)(sizeof(char*) * 2));
      type[0] = s_strdup("WSRC");
      type[1] = NULL;

      /* then there is a source structure to return */
      theDocID = makeDocID();
      theDocID->originalDatabase = stringToAny(database_name); /* XXX */
      theDocID->originalLocalID = stringToAny(local_id);

      return(makeWAISDocumentHeader(anyFromDocID(theDocID),
				    UNUSED,
				    MAX_NORMAL_SCORE,
				    UNUSED,
				    length,lines,
				    type,
				    s_strdup(database_name), /* XXX */
				    NULL, /* date */
				    s_strdup(help.headline),
				    NULL));
    }	
  else 
    return(NULL);
}

/* returns the catalog document to be returned or NULL if not possible */

static WAISDocumentHeader *catalog_header(database_name, index_directory, results)
     char *database_name;
     char *index_directory;
     boolean results;
{
  /* make a help document */
  hit catalog;
  char local_id[MAX_FILENAME_LEN + 60];

  strncpy(catalog.filename,
	  merge_pathnames(database_name,index_directory), 
	  MAX_FILENAME_LEN);
  strncat(catalog.filename, catalog_ext, MAX_FILENAME_LEN);
  /* printf("catalog filename %s", catalog.filename); */

  if(results)
    strncpy(catalog.headline,
	    "Catalog for database: ",
	    MAX_FILENAME_LEN);
  else
    strncpy(catalog.headline,
	    "Search produced no result. Here's the Catalog for database: ",
	    MAX_FILENAME_LEN);

  strncat(catalog.headline, pathname_name(database_name), 
	  MAX_FILENAME_LEN);
  sprintf(local_id, "%ld %ld %s", 0L, 0L, catalog.filename);

  if(probe_file(catalog.filename))
    { 
      DocID* theDocID = NULL;
      long length;
      long lines;
      char **type = NULL;

      catalog.start_character = 0;
      catalog.end_character = 0;
	
      { FILE *stream = s_fopen(catalog.filename, "r");
	lines = count_lines(stream);
	length = file_length(stream);
        s_fclose(stream);
      }

      type = (char**)s_malloc((size_t)(sizeof(char*) * 2));
      type[0] = s_strdup("TEXT");
      type[1] = NULL;

      /* then there is a catalog structure to return */
      theDocID = makeDocID();
      theDocID->originalDatabase = stringToAny(database_name); /* XXX */
      theDocID->originalLocalID = stringToAny(local_id);

      return(makeWAISDocumentHeader(anyFromDocID(theDocID),
				    UNUSED,
				    (results ? MAX_NORMAL_SCORE:0),
				    UNUSED,
				    length,lines,
				    type,
				    s_strdup(database_name), /* XXX */
				    NULL, /* date */
				    s_strdup(catalog.headline),
				    NULL));
    }	
  else 
    return(NULL);
}

/* picks a set of random documents from the database 
static void pick_random_documents(aSearch, headers, &headerNum)
{
  
}
*/


/*----------------------------------------------------------------------*/

static void
handleRelevanceFeedbackSearch(aSearch,sendBuf,sendBufLen,
			      waisProtocolVersion,
			      index_directory)
SearchAPDU* aSearch;
char** sendBuf;
long* sendBufLen;
long waisProtocolVersion;
char *index_directory;
{ 
  DocObj *doc = NULL;
  SearchResponseAPDU* response = NULL;
  WAISSearchResponse* wais_response = NULL;

  WAISDocumentHeader** headers = NULL;
  long headerNum = 0;
  long max_headers = ((WAISSearch *)aSearch->Query)->MaxDocumentsRetrieved;
  char* seedwords_used = NULL;
  diagnosticRecord** diags = NULL;
  char *seed_words_used = s_strdup(((WAISSearch *)aSearch->Query)->SeedWords);
  boolean search_status;

  if(((WAISSearch *)aSearch->Query)->Docs!=NULL &&
     (doc=((WAISSearch *)aSearch->Query)->Docs[0]) != NULL &&
     (isdoctype(doc, "TEXT-FTP") || isdoctype(doc, "FTP-DIR"))) {
    handleFTPSearch(aSearch,sendBuf,sendBufLen, waisProtocolVersion, index_directory);
    return;
  }

  /* construct a response list */
  headers = (WAISDocumentHeader**)
    s_malloc((size_t)
	     (sizeof(WAISDocumentHeader*) * 
	      (1 + max_headers)));
  headers[0] = NULL;

  if(0 != init_search_engine(index_directory, false, true, 0, 0, 0))
    panic("unable to initialize search engine");
  
  /* handle help queries */
  if(seed_words_used[0] == '\0' &&
     ((WAISSearch *)aSearch->Query)->Docs != NULL) {
    1;
  }
  else if(needs_help(seed_words_used)) {
      WAISDocumentHeader *header;
      char *database_name = (aSearch->DatabaseNames == NULL) ?
      INFO_DATABASE_NAME : aSearch->DatabaseNames[0];

    if(headerNum < max_headers) {
      header = help_header(database_name, index_directory);
      if(NULL != header) {
	headers[headerNum++] = header;
	headers[headerNum] = NULL;	
	((WAISSearch *)aSearch->Query)->MaxDocumentsRetrieved--;
      }
    }
    if(headerNum < max_headers) {
      header = catalog_header(database_name, index_directory, true);
      if(NULL != header){
	headers[headerNum++] = header;
	headers[headerNum] = NULL;	
	((WAISSearch *)aSearch->Query)->MaxDocumentsRetrieved--;
      }
    }
  }

  if(seed_words_used[0] == '\0' &&
     ((WAISSearch *)aSearch->Query)->Docs == NULL)
   {
     /* pick_random_documents(aSearch, headers, &headerNum); */
     search_status = true;
   }
  else
   { /* run the search on the database.  If a new
	search engine were to be used, this is where it would be hooked in */
     search_status = run_search(aSearch, headers,&diags, index_directory, 
				&seed_words_used, waisProtocolVersion,
				&headerNum);
   }

#define CATALOG_FOR_NO_RESULTS
#ifdef CATALOG_FOR_NO_RESULTS
  if(headerNum == 0 && headerNum < max_headers) {
    char *database_name = (aSearch->DatabaseNames == NULL) ?
      INFO_DATABASE_NAME : aSearch->DatabaseNames[0];
    WAISDocumentHeader *header = catalog_header(database_name, index_directory, false);

    if(NULL != header){
      waislog(WLOG_MEDIUM, WLOG_INFO,
	      "Search had no hits, returning catalog");
      headers[headerNum++] = header;
      headers[headerNum] = NULL;	
    }
  }
#endif /* CATALOG_FOR_NO_RESULTS */

  wais_response = makeWAISSearchResponse(seedwords_used,headers,NULL,
                                         NULL,NULL,NULL,NULL,diags);
  response = makeSearchResponseAPDU(search_status,0L,
				    headerNum + ((diags == NULL) ? 0 : 1),
				    0L,UNUSED,SUCCESS,
                                    aSearch->ReferenceID, wais_response);
  /* write it */
  {
    char *buff;
    long len;
    boolean it_fit = true;

    while(headerNum >= 0) {
      buff = *sendBuf;
      len = *sendBufLen;
      if ((buff = writeSearchResponseAPDU(response,buff,&len)) == NULL) {

	it_fit = false; /* didn't make it in the buffer. */
	headerNum--; 
	s_free(headers[headerNum]);
	headers[headerNum] = NULL;

	s_free(wais_response);
	wais_response = makeWAISSearchResponse(seedwords_used,headers,NULL,
					       NULL,NULL,NULL,NULL,diags);
	
	freeSearchResponseAPDU(response);
	response = makeSearchResponseAPDU(search_status,0L,
					  headerNum + ((diags == NULL) ? 0:1),
					  0L,UNUSED,SUCCESS,
					  aSearch->ReferenceID, wais_response);
      }
      else {
	break;
      }
    }
    *sendBuf = buff;
    *sendBufLen = len;
    if (!it_fit) {
      waislog(WLOG_HIGH, WLOG_WARNING, 
	      "Buffer overflow, adjusted results from %ld", 
	      ((WAISSearch *)aSearch->Query)->MaxDocumentsRetrieved);
    }
  }

  { /* generate report on results. */
    char *message;
    long size, i;

    /* calculate total length needed for log report */
    for(size = 0L, i = 0; i < headerNum; i++) 
      size+=(headers[i]->DocumentID->size+2);
    if (size > 0) {
      message = s_malloc(size);
      message[0] = 0;

      for (i = 0; i < headerNum; i++) {
	char docname[MAX_FILE_NAME_LEN+50];
	DocID *docid = docIDFromAny(headers[i]->DocumentID);
	char *docidstring = anyToString(GetLocalID(docid));

	sprintf(docname, "%s", docidstring);
	s_strncat(message, docname, headers[i]->DocumentID->size, size);

	s_free(docid); s_free(docidstring);

	if ( i < headerNum-1)
	  strcat(message, ", ");
      }
      waislog(WLOG_MEDIUM, WLOG_RESULTS, "Returned %d results", headerNum);
      waislog(WLOG_LOW, WLOG_RESULTS, "Results: %s", message);
      s_free(message);
    }
    else
      waislog(WLOG_LOW, WLOG_RESULTS,
	      "Returned 0 results.  Aww.");
  }
  freeWAISSearch((WAISSearch*)aSearch->Query); 
  freeSearchAPDU(aSearch);
  freeSearchResponseAPDU(response);
  freeWAISSearchResponse(wais_response); /* free headers & seed_words_used */
}

/*----------------------------------------------------------------------*/

static void 
handleElementRetrieval(aSearch,sendBuf,sendBufLen,waisProtocolVersion, index_directory)
SearchAPDU* aSearch;
char** sendBuf;
long* sendBufLen;
long waisProtocolVersion;
char *index_directory;
/* this is a type 1 query of the restricted form specified in the 
   WAIS-protocol.  Interpret it and write out an appropriate search
   response. (note the valid element sets are Document-Text,Document-Headlines,
   and Document-Codes but we only support text for now).
 */
{ 
  SearchResponseAPDU* response = NULL;
  WAISSearchResponse* wais_response = NULL;
  DocObj** docs = NULL;
  DocObj* doc = NULL;
  char *databaseName;
  void **elementList = NULL;
  void *element = NULL;
  diagnosticRecord** diags = NULL;
  diagnosticRecord* diag = NULL;
  long numDiags = 0L;
  long numElements = 0L;
  long i;
  database* db;
  char* new_db_name;
  
  /* read the query */
  docs = readWAISTextQuery((any*)aSearch->Query);
  databaseName = (aSearch->DatabaseNames == NULL) ?
    INFO_DATABASE_NAME : aSearch->DatabaseNames[0];

  new_db_name = merge_pathnames(databaseName, index_directory);

  /* assemble the elements and construct a response */
  for (i = 0L, doc = docs[i]; doc != NULL; doc = docs[++i])
   { 
     long errorCode;
     any* bufAny;
     long size;

     if (doc->Type != NULL &&
	 strcmp(doc->Type, "WAIS_NEXT") == 0) {
       char docname[MAX_FILE_NAME_LEN+50], *buffer;

       db = openDatabase(new_db_name, false, true);
       if ((size = 
	    next_doc(docname, 
		     anyToString(GetLocalID(docIDFromAny(doc->DocumentID))),
		     db))
	   > 0) {
	 buffer = s_malloc(strlen(docname)+50);
	 sprintf(buffer, "%s, %d", docname, size);
	 bufAny = makeAny(strlen(buffer)+1,buffer);
	 element = (void*)makeWAISDocumentText(duplicateAny(doc->DocumentID),0L,bufAny);
       }
       else element = NULL;
       closeDatabase(db);
     }
     else if (doc->Type != NULL &&
	      strcmp(doc->Type, "WAIS_PREV") == 0) {
       char docname[MAX_FILE_NAME_LEN+50], *buffer;
       any* bufAny;
       long size;

       db = openDatabase(new_db_name, false, true);
       if ((size = 
	    previous_doc(docname, 
			 anyToString(GetLocalID(docIDFromAny(doc->DocumentID))),
			 db))
	   > 0) {
	 buffer = s_malloc(strlen(docname)+50);
	 sprintf(buffer, "%s, %d", docname, size);
	 bufAny = makeAny(strlen(buffer),buffer);
	 element = (void*)makeWAISDocumentText(duplicateAny(doc->DocumentID),0L,bufAny);
       }
       else element = NULL;
       closeDatabase(db);
     }
     else if (doc->ChunkCode == CT_line)
       element = (void*)getDocumentText(doc, &errorCode, index_directory);
     else if (doc->ChunkCode == CT_byte)
       element = (void*)getData(doc, &errorCode, index_directory);

     if (errorCode != GDT_NoError)
       {			/* make a diagnostic record to return */
	 switch (errorCode)
	   { case GDT_UnsupportedChunkType:
	       diag = makeDiag(true,D_PresentRequestOutOfRange,
			       "Bad ChunkType in Request");
	       break;
	     case GDT_BadDocID:
	       diag = makeDiag(true,D_PresentRequestOutOfRange,
			       "Bad DocID in request");
	       break;
	     case GDT_MissingDocID:
	       diag = makeDiag(true,D_PresentRequestOutOfRange,
			       "Missing DocID in request");
	       break;
	     case GDT_BadRange:
	       diag = makeDiag(true,D_PresentRequestOutOfRange,
			       "Request out of range");
	       break;
	     case GDT_MissingDatabase:
	       diag = makeDiag(true,D_PresentRequestOutOfRange,
			       "Database missing from request");
	       break;
	     case GDT_BadDatabase:
	       diag = makeDiag(true,D_PresentRequestOutOfRange,
			       "File not present in specified database");
	       break;
	     default:
	       /* should never get here */
	       diag = NULL;
	       break;
	     };
	 diags = (diagnosticRecord**)s_realloc(diags,(size_t)(sizeof(diagnosticRecord*) * 
							      (numDiags + 2)));
	 diags[numDiags++] = diag;
	 diags[numDiags] = NULL;
       }
     if (element != NULL)
       { if (elementList == NULL) /* create a list */
	   { elementList = (void**)s_malloc((size_t)sizeof(void*) * 2);
	   }
       else			/* grow the list */
	 { elementList = (void**)s_realloc((char*)elementList,
					   (size_t)(sizeof(void*) * 
						    (numElements + 2)));
	 }
	   elementList[numElements++] = element; /* put it in the list */
	   elementList[numElements] = NULL;
  	 }
   }

  wais_response = makeWAISSearchResponse(NULL,NULL,NULL,NULL,
                                         (WAISDocumentText**)elementList,NULL,
                                         NULL,diags);
  response = makeSearchResponseAPDU(SUCCESS,0L,numElements + numDiags,0L,UNUSED,
                                    SUCCESS,aSearch->ReferenceID,
				    wais_response);
  
  /* write it */
  *sendBuf = writeSearchResponseAPDU(response,*sendBuf,sendBufLen);
  
  /* clean up */
  freeAny((any*)aSearch->Query); /* have to explicitly free the user info */
  freeSearchAPDU(aSearch);
  freeSearchResponseAPDU(response);
  freeWAISSearchResponse(wais_response); /* frees the elements constructed */
  doList((void**)docs,freeDocObj);
  s_free(docs);  
}

/*----------------------------------------------------------------------*/

static void
handleFTPSearch(aSearch,sendBuf,sendBufLen,
		waisProtocolVersion,
		index_directory)
SearchAPDU* aSearch;
char** sendBuf;
long* sendBufLen;
long waisProtocolVersion;
char *index_directory;
{
  SearchResponseAPDU* response = NULL;
  WAISSearchResponse* wais_response = NULL;

  DocID *t;
  WAISDocumentHeader** headers = NULL;
  long headerNum = 0;
  long max_headers = ((WAISSearch *)aSearch->Query)->MaxDocumentsRetrieved;
  DocObj *doc;
  char* seedwords_used = NULL;
  diagnosticRecord** diags = NULL;
  char *seed_words_used = s_strdup(((WAISSearch *)aSearch->Query)->SeedWords);
  char *database_name = (aSearch->DatabaseNames == NULL) ?
    INFO_DATABASE_NAME : aSearch->DatabaseNames[0];
  boolean search_status;
  int i=0;
  char *local_id,*p;
  long start,end;
  char path[200];
  char mpath[200];
  int count;

  /* construct a response list */
  headers = (WAISDocumentHeader**)
    s_malloc((size_t)
	     (sizeof(WAISDocumentHeader*) *
	      (1 + max_headers)));
  headers[0] = NULL;
  for(doc=((WAISSearch *)aSearch->Query)->Docs[i]; ((WAISSearch *)aSearch->Query)->Docs[i]!=NULL;
      doc=((WAISSearch *)aSearch->Query)->Docs[++i]){

    t=docIDFromAny(doc->DocumentID);
    local_id = anyToString(GetLocalID(t));
    freeDocID(t);
    sscanf(local_id,"%ld %ld %s",&start,&end,path);
    if(strcmp(path, "/")) {
      p=strrchr(path,'/');
      if(p)
	*p='\0';
    }
    getwd(mpath,198);
    if(index_directory && !substrcmp(path, index_directory))
      strcpy(path, index_directory);

    chdir(path);

    loadFileHeaders(path,headers,&headerNum,database_name, 
		    ((WAISSearch *)aSearch->Query)->MaxDocumentsRetrieved,
		    index_directory); 
    chdir(mpath);
  }
  /**** jim jim jim */
  wais_response = makeWAISSearchResponse(seedwords_used,headers,NULL,
					 NULL,NULL,NULL,NULL,diags);
  response = makeSearchResponseAPDU(search_status,0L,
				    headerNum + ((diags == NULL) ? 0 : 1),
				    0L,UNUSED,SUCCESS,
				    aSearch->ReferenceID, wais_response);
  /* write it */
  {
    char *buff;
    long len;
    boolean it_fit = true;

    while(headerNum >= 0) {
      buff = *sendBuf;
      len = *sendBufLen;
      if (headerNum*(sizeof(WAISDocumentHeader)+100) > len || (buff = writeSearchResponseAPDU(response,buff,&len)) == NULL) {

	it_fit = false;		/* didn't make it in the buffer. */
	headerNum--;
	s_free(headers[headerNum]);
	headers[headerNum] = NULL;

	s_free(wais_response);
	wais_response = makeWAISSearchResponse(seedwords_used,headers,NULL,
					       NULL,NULL,NULL,NULL,diags);

	freeSearchResponseAPDU(response);
	response = makeSearchResponseAPDU(search_status,0L,
					  headerNum + ((diags == NULL) ? 0:1),
					  0L,UNUSED,SUCCESS,
					  aSearch->ReferenceID, wais_response);
      }
      else {
	break;
      }
    }
    *sendBuf = buff;
    *sendBufLen = len;
    if (!it_fit) {
      waislog(WLOG_HIGH, WLOG_WARNING,

	      "Buffer overflow, adjusted results from %ld",
	      ((WAISSearch *)aSearch->Query)->MaxDocumentsRetrieved);
    }
  }
  {				/* generate report on results. */
    char *message;
    long size, i;

    /* calculate total length needed for log report */
    for(size = 0L, i = 0; i < headerNum; i++)
      size+=(headers[i]->DocumentID->size+2);
    if (size > 0) {
      message = s_malloc(size);
      message[0] = 0;

      for (i = 0; i < headerNum; i++) {
	char docname[MAX_FILE_NAME_LEN+50];
	DocID *docid = docIDFromAny(headers[i]->DocumentID);
	char *docidstring = anyToString(GetLocalID(docid));

	sprintf(docname, "%s", docidstring);
	s_strncat(message, docname, headers[i]->DocumentID->size, size);

	s_free(docid); 
	s_free(docidstring);

	if ( i < headerNum-1)
	  strcat(message, ", ");
      }
      waislog(WLOG_LOW, WLOG_RESULTS,
	      "Returned %d results: %s", headerNum, message);
      s_free(message);
    }
    else
      waislog(WLOG_LOW, WLOG_RESULTS,
	      "Returned 0 results.  Aww.");
  }
  freeWAISSearch((WAISSearch*)aSearch->Query);
  freeSearchAPDU(aSearch);
  freeSearchResponseAPDU(response);
  freeWAISSearchResponse(wais_response); /* free headers & seed_words_used */
}

static int
  alphasort(d1, d2)
struct dirent **d1;
struct dirent **d2;
{
  return strcmp((*d1)->d_name, (*d2)->d_name);
}

static int
filesonly(e)
struct dirent *e;
{
  struct stat sb;
  int val;
  val = (stat(e->d_name, &sb) >= 0 &&( (sb.st_mode & S_IFMT) == S_IFREG || (sb.st_mode & S_IFMT)==S_IFDIR));
  if((sb.st_mode & S_IFMT)==S_IFDIR){
    if(e->d_name[strlen(e->d_name)-1]=='.')
      return(0);
    strcat(e->d_name,"/");
  }
  return(val);
}

loadFileHeaders(path,headers, headerNum,database_name,maxf,index_directory)
char *path;
WAISDocumentHeader **headers;
long *headerNum;
char *database_name;
long maxf;
char *index_directory;
{
  register int i;
  register int j;
  int k;
  struct dirent **list;
  hit help;
  char local_id[MAX_FILENAME_LEN + 60];
  DocID* theDocID = NULL;
  struct stat sbuf;
  long length,flen;
  long lines;
  char **type = NULL;
  char *p,tmpb[200];
  int loop;
  int ch,text=1;
  FILE *fp;

  k = *headerNum;
  if ((i = scandir(".", &list, filesonly, alphasort)) < 0) {
    return;
  }
  if(strcmp(path, "/") &&
     (index_directory == NULL || 
      strcmp(path, index_directory))) {
    pathname_directory(path, help.filename);
    stat(help.filename,&sbuf);
    length=lines=sbuf.st_size;
    strncpy(help.headline, help.filename, MAX_FILENAME_LEN);
    sprintf(local_id, "%ld %ld %s", 0L, length, help.filename);
    theDocID = makeDocID();
    theDocID->originalDatabase = stringToAny(database_name); /* XXX */
    theDocID->originalLocalID = stringToAny(local_id);
    type = (char**)s_malloc((size_t)(sizeof(char*) * 2));
    strcpy(tmpb,help.filename);
    type[0] = s_strdup("FTP-DIR");
    type[1] = NULL;

    headers[k++]=makeWAISDocumentHeader(anyFromDocID(theDocID),
					UNUSED, MAX_NORMAL_SCORE, UNUSED, length,lines, type,
					s_strdup(database_name), /* XXX */
					NULL, /* date */
					s_strdup(help.headline),
					NULL);
  }

  if(!strcmp(path, "/")) {
    *path = '\0';
  }
  for (j = 0; j < i; j++){
    if(k>=maxf)
      break;
    sprintf(help.filename,"%s/%s",path,list[j]->d_name);
    stat(list[j]->d_name,&sbuf);
    length=lines=sbuf.st_size;
    strncpy(help.headline, help.filename, MAX_FILENAME_LEN);
    sprintf(local_id, "%ld %ld %s", 0L, length, help.filename);
    theDocID = makeDocID();
    theDocID->originalDatabase = stringToAny(database_name); /* XXX */
    theDocID->originalLocalID = stringToAny(local_id);
    type = (char**)s_malloc((size_t)(sizeof(char*) * 2));
    strcpy(tmpb,help.filename);
    p=strrchr(tmpb,'.');
    if(p){
      ++p;
      if(!strcasecmp(p,"tar") || !strcasecmp(p,"shar"))
	text=0;
      else
	text=1;
    }
    if(text==1){
      fp=fopen(help.filename,"r");
      if(fp==NULL)
	text=0;
      else{
	fseek(fp,0L,2);
	flen=ftell(fp);
	for(loop=2; loop<100; loop++){
	  fseek(fp,flen/loop,0);
	  ch=fgetc(fp);
	  if(ch==EOF || (!isprint(ch) && !isspace(ch)))
	    text=0;
	}
	fclose(fp);
      }
    }

    if(S_ISDIR(sbuf.st_mode))
      type[0] = s_strdup("FTP-DIR");
    else if(text==0)
      type[0] = s_strdup("FTP");
    else
      type[0]=s_strdup("TEXT");

    type[1] = NULL;

    headers[k++]=makeWAISDocumentHeader(anyFromDocID(theDocID),
					UNUSED, MAX_NORMAL_SCORE, 
					UNUSED, length,lines, type,
					s_strdup(database_name), /* XXX */
					NULL, /* date */
					s_strdup(help.headline),
					NULL);
  }
  if(list != NULL) {
    for (j = 0; j < i; j++)
      if(list[j] != NULL) free((char *)list[j]);
    free((char *)list);
  }
  *headerNum = k;
}

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.