ftp.nice.ch/pub/next/unix/network/news/nn.6.4.16.s.tar.gz#/nn/collect.c

This is collect.c in view mode; [Download] [Up]

/*
 *	(c) Copyright 1990, Kim Fabricius Storm.  All rights reserved.
 *
 *	Collect and save article information in database.
 */

#include "config.h"
#include "db.h"
#include "news.h"

#define COUNT_RE_REFERENCES	/* no of >>> depends on Reference: line */

export int ignore_bad_articles = 1;	/* no Newsgroups: line */
export int remove_bad_articles = 0;
export time_t max_article_age = 0;

import int trace, debug_mode;

extern time_stamp pack_date();

static long bad_count;

static FILE *ix, *data;

static do_auto_archive(gh, f, num)
group_header *gh;
register FILE *f;
article_number num;
{
    char line[200];
    article_number last;
    register FILE *arc;
    register int c;
    off_t start;
    static char *arc_header = "Archived-Last: ";
    /* Header format: Archived-Last: 88888888 group.name */
    /* Fixed constants length == 15 and offset == 24 are used below */

    arc = open_file(gh->archive_file, OPEN_READ);
    last = 0;
    start = 0;
    if (arc != NULL) {
	while (fgets(line, 200, arc) != NULL) {
	    if (strncmp(line, arc_header, 15)) {
		log_entry('E', "%s not archive for %s\n",
			  gh->archive_file, gh->group_name);
		gh->master_flag &= ~M_AUTO_ARCHIVE;
		fclose(arc);
		return;
	    }
	    if (strncmp(line + 24, gh->group_name, gh->group_name_length)) {
		start = ftell(arc);
		continue;
	    }
	    last = atol(line + 15);
	    break;
	}
	fclose(arc);
	arc = NULL;
    }

    if (last >= num) return;

    arc = open_file(gh->archive_file, last > 0 ? OPEN_UPDATE : OPEN_CREATE);
    if (arc == NULL) {
	log_entry('E', "Cannot create archive file: %s\n", gh->archive_file);
	gh->master_flag &= ~M_AUTO_ARCHIVE;
	return;
    }

    fseek(arc, start, 0);
    fprintf(arc, "%s%8ld %s\n", arc_header, (long)num, gh->group_name);
    fseek(arc, (off_t)0, 2);

    fseek(f, (off_t)0, 0);
    while ((c = getc(f)) != EOF) putc(c, arc);
    putc(NL, arc);
    fclose(arc);
}

static build_hdr(type)
int type;
{
    register char *name, *subj;
    int re;

    db_data.dh_type = type;

    if (type == DH_SUB_DIGEST) {

	name = digest.dg_from;
	subj = digest.dg_subj;

	db_hdr.dh_lines = digest.dg_lines;

	db_hdr.dh_hpos = digest.dg_hpos;
	db_hdr.dh_fpos = (int16)(digest.dg_fpos - db_hdr.dh_hpos);
	db_hdr.dh_lpos = digest.dg_lpos;

	db_hdr.dh_date = pack_date(digest.dg_date ? digest.dg_date : news.ng_date);
    } else {

	if (!news.ng_from) news.ng_from = news.ng_reply;

	name = news.ng_from;
	subj = news.ng_subj;

	db_hdr.dh_lines = news.ng_lines;

	db_hdr.dh_hpos = 0;
	db_hdr.dh_fpos = (int16)(news.ng_fpos);
	db_hdr.dh_lpos = news.ng_lpos;

	db_hdr.dh_date = pack_date(news.ng_date);
    }

    if (name) {
	db_hdr.dh_sender_length = pack_name(db_data.dh_sender, name, NAME_LENGTH);
    } else
    	db_hdr.dh_sender_length = 0;

    if (type == DH_DIGEST_HEADER) {
	db_hdr.dh_subject_length = 1;
	db_data.dh_subject[0] = '@';
    } else
	db_hdr.dh_subject_length = 0;

    db_hdr.dh_subject_length +=
	pack_subject(db_data.dh_subject + db_hdr.dh_subject_length, subj, &re,
		     DBUF_SIZE);

#ifdef COUNT_RE_REFERENCES
    if (re) re = 0x80;
    if (news.ng_ref) {
	for (name = news.ng_ref; *name; name++) {
	    if ((re & 0x7f) == 0x7f) break;
	    if (*name == '<') re++;
	}
    }
#endif
    db_hdr.dh_replies = re;

    if (db_write_art(data) < 0) write_error();
}


static collect_article(gh, art_num)
register group_header *gh;
article_number art_num;
{
    FILE *art_file;
    news_header_buffer nhbuf, dgbuf;
    article_header art_hdr;
    int mode, count;
    cross_post_number *cp_ptr;
    long age;

    count = 0;

    db_hdr.dh_number = art_num;

    /* get article header */

    art_hdr.a_number = art_num;
    art_hdr.hpos = (off_t)0;
    art_hdr.lpos = (off_t)0;
    art_hdr.flag = 0;

    mode = FILL_NEWS_HEADER | FILL_OFFSETS | SKIP_HEADER;
    if ((gh->master_flag & (M_CONTROL | M_NEVER_DIGEST | M_ALWAYS_DIGEST)) == 0)
	mode |= DIGEST_CHECK;
#ifdef NNTP
    if ((gh->master_flag & M_ALWAYS_DIGEST) == 0)
	mode |= LAZY_BODY;
#endif
    if ((art_file = open_news_article(&art_hdr, mode, nhbuf, (char *)NULL)) == NULL) {

#ifdef NNTP
	import nntp_failed;

	if (nntp_failed) {
	    /*
	     * connection to nntp_server is broken
	     * stop collection of articles immediately
	     */
	    return -1;
	}
#endif
	/*
	 * it is not really necessary to save anything in the data file
	 * we simply use the index file to get the *first* available article
	 */
	return 0;
    }

    if (art_file == (FILE *)1) {	/* empty file */
	if (!ignore_bad_articles) return 0;
	news.ng_groups = NULL;
	art_file = NULL;
    } else
	if ( max_article_age &&	/* == 0 if use_nntp */
	    (gh->master_flag & M_INCLUDE_OLD) == 0 &&
	    (age = m_time(art_file)) < max_article_age) {

	    if (remove_bad_articles) unlink(group_path_name);

	    log_entry('O', "%sold article (%ld days): %s/%ld",
		      remove_bad_articles ? "removed " : "",
		      (cur_time() - age) / (24 * 60 * 60),
		      current_group->group_name, (long)art_num);
	    bad_count++;
	    fclose(art_file);
	    return 0;
	}

    if (ignore_bad_articles && news.ng_groups == NULL) {
	char *rem = "";

	if (!use_nntp && remove_bad_articles) {
	    unlink(group_path_name);
	    rem = "removed ";
	}

	log_entry('B', "%sbad article: %s/%ld", rem,
		  current_group->group_name, (long)art_num);
	if (art_file != NULL) fclose(art_file);
	bad_count++;
	return 0;
    }

    /* map cross-postings into a list of group numbers */

    db_hdr.dh_cross_postings = 0;

    if (gh->master_flag & M_CONTROL) {
	/* we cannot trust the Newsgroups: line in the control group */
	/* so we simply ignore it (i.e. use "Newsgroups: control") */
	goto dont_digest;
    }

    if (news.ng_groups) {
	char *curg, *nextg;
	group_header *gh1;

	for (nextg = news.ng_groups, cp_ptr = db_data.dh_cross; *nextg; ) {
	    curg = nextg;

	    if (nextg = strchr(curg, ','))
		*nextg++ = NUL;
	    else
		nextg = "";

	    if (strcmp(gh->group_name, curg) == 0)
		gh1 = gh;
	    else
	    if ((gh1 = lookup(curg)) == NULL) continue;

	    *cp_ptr++ = NETW_CROSS_EXT(gh1->group_num);
	    if (++db_hdr.dh_cross_postings == DBUF_SIZE) break;
	}
    }

    if (db_hdr.dh_cross_postings == 1)
	db_hdr.dh_cross_postings = 0;	/* only current group */

    if (gh->master_flag & M_NEVER_DIGEST)
	goto dont_digest;

    /* split digest */

    if ((gh->master_flag & M_ALWAYS_DIGEST) || (news.ng_flag & N_DIGEST)) {
	int any = 0, cont = 1;

	skip_digest_body(art_file);

	while (cont && (cont = get_digest_article(art_file, dgbuf)) >= 0) {

	    if (any == 0) {
		build_hdr(DH_DIGEST_HEADER);	/* write DIGEST_HEADER */
		count++;
		db_hdr.dh_cross_postings = 0;	/* no cross post in sub */
		any++;
	    }
	    build_hdr(DH_SUB_DIGEST);	/* write SUB_DIGEST */
	    count++;
	}

	if (any) goto finish;
    }

    /* not a digest */

 dont_digest:

    build_hdr(DH_NORMAL);	/* normal article */
    count++;

finish:

    if (gh->master_flag & M_AUTO_ARCHIVE)
	do_auto_archive(gh, art_file, art_num);

    fclose(art_file);

    return count;
}


/*
 *	Collect unread articles in current group
 *
 *	On entry, init_group has been called to setup the proper environment
 */

static long collect_group(gh)
register group_header *gh;
{
    long article_count, temp, obad;
    article_number start_collect;

    if (gh->last_db_article == 0) {
	gh->first_db_article = gh->first_a_article;
	gh->last_db_article = gh->first_db_article - 1;
    }

    if (gh->last_db_article >= gh->last_a_article) return 0;

    if (gh->index_write_offset) {
	ix = open_data_file(gh, 'x', OPEN_UPDATE|MUST_EXIST);
	fseek(ix, gh->index_write_offset, 0);
    } else
    	ix = open_data_file(gh, 'x', OPEN_CREATE|MUST_EXIST);

    if (gh->data_write_offset) {
	data = open_data_file(gh, 'd', OPEN_UPDATE|MUST_EXIST);
	fseek(data, gh->data_write_offset, 0);
    } else
	data = open_data_file(gh, 'd', OPEN_CREATE|MUST_EXIST);

    article_count = 0;
    start_collect = gh->last_db_article+1;

    if (debug_mode) {
	printf("\t\t%s (%ld..%ld)\r",
	       gh->group_name, start_collect, gh->last_a_article);
	fl;
    }
    bad_count = obad = 0;

    while (gh->last_db_article < gh->last_a_article) {
	if (s_hangup) break;
	gh->last_db_article++;
	if (debug_mode) {
	    printf("\r%ld", gh->last_db_article);
	    if (obad != bad_count) printf("\t%ld", bad_count);
	    obad = bad_count;
	    fl;
	}
	gh->data_write_offset = ftell(data);
#ifdef NNTP
	gh->index_write_offset = ftell(ix);
#endif
	temp = collect_article(gh, gh->last_db_article);
#ifdef NNTP
	if (temp < 0) {
	    /* connection failed, current article is not collected */
	    gh->last_db_article--;
	    article_count = -1;
	    goto out;
	}
#endif
#ifndef RENUMBER_DANGER
	if (temp == 0 && gh->data_write_offset == (off_t)0) {
	    gh->first_db_article = gh->last_db_article + 1;
	    continue;
	}
#endif
	if (!db_write_offset(ix, &(gh->data_write_offset)))
	    write_error();
	article_count += temp;
    }

    if (start_collect < gh->first_db_article)
	start_collect = gh->first_db_article;

    if (trace && start_collect <= gh->last_db_article)
	log_entry('T', "Col %s (%d to %d) %d",
		  gh->group_name,
		  start_collect, gh->last_db_article,
		  article_count);

    if (debug_mode)
	printf("\nCol %s (%d to %d) %d",
	       gh->group_name,
	       start_collect, gh->last_db_article,
	       article_count);

    gh->data_write_offset = ftell(data);
    gh->index_write_offset = ftell(ix);

 out:
    fclose(data);
    fclose(ix);

    if (debug_mode) putchar(NL);

    return article_count;
}


do_collect()
{
    register group_header *gh;
    long col_article_count, temp;
    int col_group_count;
    time_t start_time;

    start_time = cur_time();
    col_article_count = col_group_count = 0;
    current_group = NULL; /* for init_group */
    temp = 0;

    Loop_Groups_Header(gh) {
	if (s_hangup) {
	    temp = -1;
	    break;
	}

	if (gh->master_flag & M_IGNORE_GROUP) continue;

	if (gh->master_flag & M_MUST_CLEAN)
	    clean_group(gh);

	if (gh->last_db_article == gh->last_a_article) {
	    if (gh->master_flag & M_BLOCKED) goto unblock_group;
	    continue;
	}

	if (!init_group(gh)) {
	    if ((gh->master_flag & M_NO_DIRECTORY) == 0) {
		log_entry('R', "%s: no directory", gh->group_name);
		gh->master_flag |= M_NO_DIRECTORY;
	    }
	    gh->last_db_article = gh->last_a_article;
	    gh->first_db_article = gh->last_a_article;	/* OBS: not first */
	    gh->master_flag &= ~(M_EXPIRE | M_BLOCKED);
	    db_write_group(gh);
	    continue;
	}

	if (gh->master_flag & M_NO_DIRECTORY) {
	    /* The directory has been created now */
	    gh->master_flag &= ~M_NO_DIRECTORY;
	    clean_group(gh);
	}

	temp = collect_group(gh);
#ifdef NNTP
	if (temp < 0) {
	    /* connection broken */
	    gh->master_flag &= ~M_EXPIRE;	/* remains blocked */
	    db_write_group(gh);
	    break;
	}
#endif
	if (temp > 0) {
	    col_article_count += temp;
	    col_group_count++;
	}

     unblock_group:
	gh->master_flag &= ~(M_EXPIRE | M_BLOCKED);
	db_write_group(gh);
    }

    if (col_article_count > 0)
	log_entry('C', "Collect: %ld art, %d gr, %ld s",
		  col_article_count, col_group_count,
		  cur_time() - start_time);

    return temp >= 0;
}

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.