ftp.nice.ch/pub/next/unix/network/www/apache.1.2.5.NIHS.bs.tar.gz#/apache.1.2.5.NIHS.bs/src/support/logresolve.c

This is logresolve.c in view mode; [Download] [Up]

/***                                                                      ***\
    logresolve 1.1

    Tom Rathborne - tomr@uunet.ca - http://www.uunet.ca/~tomr/
    UUNET Canada, April 16, 1995

    Rewritten by David Robinson. (drtr@ast.cam.ac.uk)

    Usage: logresolve [-s filename] [-c] < access_log > new_log

    Arguments:
       -s filename     name of a file to record statistics
       -c              check the DNS for a matching A record for the host.

    Notes:

    To generate meaningful statistics from an HTTPD log file, it's good
    to have the domain name of each machine that accessed your site, but
    doing this on the fly can slow HTTPD down.

    Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
    resolution off. Before running your stats program, just run your log
    file through this program (logresolve) and all of your IP numbers will
    be resolved into hostnames (where possible).

    logresolve takes an HTTPD access log (in the COMMON log file format,
    or any other format that has the IP number/domain name as the first
    field for that matter), and outputs the same file with all of the
    domain names looked up. Where no domain name can be found, the IP
    number is left in.

    To minimize impact on your nameserver, logresolve has its very own
    internal hash-table cache. This means that each IP number will only
    be looked up the first time it is found in the log file.

    The -c option causes logresolve to apply the same check as httpd
    compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
    address, it looks up the IP addresses for the hostname and checks
    that one of these matches the original address.

\***                                                                      ***/

#include <sys/types.h>

#include <ctype.h>
#include <netdb.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>

#include <sys/socket.h>

#include <netinet/in.h>
#ifndef MPE
#include <arpa/inet.h>
#endif

static void cgethost(struct in_addr ipnum, char *string, int check);
static int getline(char *s, int n);
static void stats(FILE *output);


/* maximum line length */
#define MAXLINE 1024

/* maximum length of a domain name */
#ifndef MAXDNAME
#define MAXDNAME 256
#endif

/* number of buckets in cache hash table */
#define BUCKETS 256

#ifdef MPE
char *strdup (const char *str)
{
  char *dup; 
             
  if(!(dup = (char *)malloc (strlen (str) + 1)))
      return NULL;
  dup = strcpy (dup, str);
     
  return dup; 
}
#endif

/*
 * struct nsrec - record of nameservice for cache linked list
 * 
 * ipnum - IP number hostname - hostname noname - nonzero if IP number has no
 * hostname, i.e. hostname=IP number
 */

struct nsrec {
    struct in_addr ipnum;
    char *hostname;
    int	noname;
    struct nsrec *next;
} *nscache[BUCKETS];

/*
 * statistics - obvious
 */

/* largeste value for h_errno */
#define MAX_ERR (NO_ADDRESS)
#define UNKNOWN_ERR (MAX_ERR+1)
#define NO_REVERSE  (MAX_ERR+2)

static int cachehits = 0;
static int cachesize = 0;
static int entries = 0;
static int resolves = 0;
static int withname = 0;
static int errors[MAX_ERR+3];

/*
 * cgethost - gets hostname by IP address, caching, and adding unresolvable
 * IP numbers with their IP number as hostname, setting noname flag
 */

static void
cgethost(ipnum, string, check)
struct in_addr ipnum;
char *string;
int check;
{
    struct nsrec **current, *new;
    struct hostent *hostdata;
    char *name;
    extern int h_errno;  /* some machines don't have this in their headers */

    current = &nscache[((ipnum.s_addr + (ipnum.s_addr >> 8) +
	       	(ipnum.s_addr >> 16) + (ipnum.s_addr >> 24)) % BUCKETS)];

    while (*current != NULL && ipnum.s_addr != (*current)->ipnum.s_addr)
	current = & (*current)->next;

    if (*current == NULL)
    {
	cachesize++;
	new = (struct nsrec *) malloc(sizeof(struct nsrec));
	if (new == NULL)
	{
	    perror("malloc");
	    fprintf(stderr, "Insufficient memory\n");
	    exit(1);
	}
	*current = new;
	new->next = NULL;

	new->ipnum = ipnum;
	
	hostdata = gethostbyaddr((const char *) &ipnum, sizeof(struct in_addr),
				 AF_INET);
	if (hostdata == NULL)
	{
	    if (h_errno > MAX_ERR) errors[UNKNOWN_ERR]++;
	    else errors[h_errno]++;
	    new->noname = h_errno;
	    name = strdup(inet_ntoa(ipnum));
	} else
	{
	    new->noname = 0;
	    name = strdup(hostdata->h_name);
	    if (check)
	    {
		if (name == NULL)
		{
		    perror("strdup");
		    fprintf(stderr, "Insufficient memory\n");
		    exit(1);
		}
		hostdata = gethostbyname(name);
		if (hostdata != NULL)
		{
		    char **hptr;
		    
		    for (hptr=hostdata->h_addr_list; *hptr != NULL; hptr++)
			if(((struct in_addr *)(*hptr))->s_addr == ipnum.s_addr)
			    break;
		    if (*hptr == NULL) hostdata = NULL;
		}
		if (hostdata == NULL)
		{
		    fprintf(stderr, "Bad host: %s != %s\n", name,
			    inet_ntoa(ipnum));
		    new->noname = NO_REVERSE;
		    free(name);
		    name = strdup(inet_ntoa(ipnum));
		    errors[NO_REVERSE]++;
		}
	    }
	}
	new->hostname = name;
	if (new->hostname == NULL)
	{
	    perror("strdup");
	    fprintf(stderr, "Insufficient memory\n");
	    exit(1);
	}
    } else
	cachehits++;

    /* size of string == MAXDNAME +1 */
    strncpy(string, (*current)->hostname, MAXDNAME);
    string[MAXDNAME] = '\0';
}

/*
 * prints various statistics to output
 */

static void
stats(output)
FILE *output;
{
    int i;
    char *ipstring;
    struct nsrec *current;
    char *errstring[MAX_ERR+3];

    for (i=0; i < MAX_ERR+3; i++) errstring[i] = "Unknown error";
    errstring[HOST_NOT_FOUND] = "Host not found";
    errstring[TRY_AGAIN] = "Try again";
    errstring[NO_RECOVERY] = "Non recoverable error";
    errstring[NO_DATA] = "No data record";
    errstring[NO_ADDRESS] = "No address";
    errstring[NO_REVERSE] = "No reverse entry";
    
    fprintf(output, "logresolve Statistics:\n");

    fprintf(output, "Entries: %d\n", entries);
    fprintf(output, "    With name   : %d\n", withname);
    fprintf(output, "    Resolves    : %d\n", resolves);
    if (errors[HOST_NOT_FOUND])
	fprintf(output, "    - Not found : %d\n", errors[HOST_NOT_FOUND]);
    if (errors[TRY_AGAIN])
	fprintf(output, "    - Try again : %d\n", errors[TRY_AGAIN]);
    if (errors[NO_DATA])
	fprintf(output, "    - No data   : %d\n", errors[NO_DATA]);
    if (errors[NO_ADDRESS])
	fprintf(output, "    - No address: %d\n", errors[NO_ADDRESS]);
    if (errors[NO_REVERSE])
	fprintf(output, "    - No reverse: %d\n", errors[NO_REVERSE]);
    fprintf(output, "Cache hits      : %d\n", cachehits);
    fprintf(output, "Cache size      : %d\n", cachesize);
    fprintf(output, "Cache buckets   :     IP number * hostname\n");

    for (i = 0; i < BUCKETS; i++)
	for (current = nscache[i]; current != NULL; current = current->next)
	{
	    ipstring = inet_ntoa(current->ipnum);
	    if (current->noname == 0)
		fprintf(output, "  %3d  %15s - %s\n", i, ipstring,
			current->hostname);
	    else
	    {
		if (current->noname > MAX_ERR+2)
		    fprintf(output, "  %3d  %15s : Unknown error\n", i,
			    ipstring);
		else
		    fprintf(output, "  %3d  %15s : %s\n", i, ipstring,
			    errstring[current->noname]);
	    }
	}
}


/*
 * gets a line from stdin
 */

static int
getline(s, n)
char *s;
int n;
{
    char *cp;
    
    if (!fgets(s, n, stdin))
	return (0);
    cp = strchr(s, '\n');
    if (cp)
	*cp = '\0';
    return (1);
}

int
main(argc, argv)
int argc;
char *argv[];
{
    struct in_addr ipnum;
    char *bar, hoststring[MAXDNAME+1], line[MAXLINE], *statfile;
    int i, check;

    check = 0;
    statfile = NULL;
    for (i=1; i < argc; i++)
    {
	if (strcmp(argv[i], "-c") == 0) check = 1;
	else if (strcmp(argv[i], "-s") == 0)
	{
	    if (i == argc-1)
	    {
		fprintf(stderr, "logresolve: missing filename to -s\n");
		exit(1);
	    }
	    i++;
	    statfile = argv[i];
	}
	else
	{
	    fprintf(stderr, "Usage: logresolve [-s statfile] [-c] < input > output");
	    exit(0);
	}
    }
    

    for (i = 0; i < BUCKETS; i++) nscache[i] = NULL;
    for (i=0; i < MAX_ERR+2; i++) errors[i] = 0;

    while (getline(line, MAXLINE))
    {
	if (line[0] == '\0') continue;
	entries++;
	if (!isdigit(line[0]))
	{ /* short cut */
	    puts(line);
	    withname++;
	    continue;
	}
	bar = strchr(line, ' ');
	if (bar != NULL) *bar = '\0';
	ipnum.s_addr = inet_addr(line);
	if (ipnum.s_addr == 0xffffffffu)
	{
	    if (bar != NULL) *bar = ' ';
	    puts(line);
	    withname++;
	    continue;
	}

	resolves++;

	cgethost(ipnum, hoststring, check);
	if (bar != NULL)
	    printf("%s %s\n", hoststring, bar+1);
	else
	    puts(hoststring);
    }
    
    if (statfile != NULL)
    {
	FILE *fp;
	fp = fopen(statfile, "w");
	if (fp == NULL)
	{
	    fprintf(stderr, "logresolve: could not open statistics file '%s'\n"
		    , statfile);
	    exit(1);
	}
	stats(fp);
	fclose(fp);
    }
    
    return (0);
}

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.