This is collect.c in view mode; [Download] [Up]
/* * (c) Copyright 1990, Kim Fabricius Storm. All rights reserved. * * Collect and save article information in database. */ #include "config.h" #include "db.h" #include "news.h" #define COUNT_RE_REFERENCES /* no of >>> depends on Reference: line */ export int ignore_bad_articles = 1; /* no Newsgroups: line */ export int remove_bad_articles = 0; export time_t max_article_age = 0; import int trace, debug_mode; extern time_stamp pack_date(); static long bad_count; static FILE *ix, *data; static do_auto_archive(gh, f, num) group_header *gh; register FILE *f; article_number num; { char line[200]; article_number last; register FILE *arc; register int c; off_t start; static char *arc_header = "Archived-Last: "; /* Header format: Archived-Last: 88888888 group.name */ /* Fixed constants length == 15 and offset == 24 are used below */ arc = open_file(gh->archive_file, OPEN_READ); last = 0; start = 0; if (arc != NULL) { while (fgets(line, 200, arc) != NULL) { if (strncmp(line, arc_header, 15)) { log_entry('E', "%s not archive for %s\n", gh->archive_file, gh->group_name); gh->master_flag &= ~M_AUTO_ARCHIVE; fclose(arc); return; } if (strncmp(line + 24, gh->group_name, gh->group_name_length)) { start = ftell(arc); continue; } last = atol(line + 15); break; } fclose(arc); arc = NULL; } if (last >= num) return; arc = open_file(gh->archive_file, last > 0 ? OPEN_UPDATE : OPEN_CREATE); if (arc == NULL) { log_entry('E', "Cannot create archive file: %s\n", gh->archive_file); gh->master_flag &= ~M_AUTO_ARCHIVE; return; } fseek(arc, start, 0); fprintf(arc, "%s%8ld %s\n", arc_header, (long)num, gh->group_name); fseek(arc, (off_t)0, 2); fseek(f, (off_t)0, 0); while ((c = getc(f)) != EOF) putc(c, arc); putc(NL, arc); fclose(arc); } static build_hdr(type) int type; { register char *name, *subj; int re; db_data.dh_type = type; if (type == DH_SUB_DIGEST) { name = digest.dg_from; subj = digest.dg_subj; db_hdr.dh_lines = digest.dg_lines; db_hdr.dh_hpos = digest.dg_hpos; db_hdr.dh_fpos = (int16)(digest.dg_fpos - db_hdr.dh_hpos); db_hdr.dh_lpos = digest.dg_lpos; db_hdr.dh_date = pack_date(digest.dg_date ? digest.dg_date : news.ng_date); } else { if (!news.ng_from) news.ng_from = news.ng_reply; name = news.ng_from; subj = news.ng_subj; db_hdr.dh_lines = news.ng_lines; db_hdr.dh_hpos = 0; db_hdr.dh_fpos = (int16)(news.ng_fpos); db_hdr.dh_lpos = news.ng_lpos; db_hdr.dh_date = pack_date(news.ng_date); } if (name) { db_hdr.dh_sender_length = pack_name(db_data.dh_sender, name, NAME_LENGTH); } else db_hdr.dh_sender_length = 0; if (type == DH_DIGEST_HEADER) { db_hdr.dh_subject_length = 1; db_data.dh_subject[0] = '@'; } else db_hdr.dh_subject_length = 0; db_hdr.dh_subject_length += pack_subject(db_data.dh_subject + db_hdr.dh_subject_length, subj, &re, DBUF_SIZE); #ifdef COUNT_RE_REFERENCES if (re) re = 0x80; if (news.ng_ref) { for (name = news.ng_ref; *name; name++) { if ((re & 0x7f) == 0x7f) break; if (*name == '<') re++; } } #endif db_hdr.dh_replies = re; if (db_write_art(data) < 0) write_error(); } static collect_article(gh, art_num) register group_header *gh; article_number art_num; { FILE *art_file; news_header_buffer nhbuf, dgbuf; article_header art_hdr; int mode, count; cross_post_number *cp_ptr; long age; count = 0; db_hdr.dh_number = art_num; /* get article header */ art_hdr.a_number = art_num; art_hdr.hpos = (off_t)0; art_hdr.lpos = (off_t)0; art_hdr.flag = 0; mode = FILL_NEWS_HEADER | FILL_OFFSETS | SKIP_HEADER; if ((gh->master_flag & (M_CONTROL | M_NEVER_DIGEST | M_ALWAYS_DIGEST)) == 0) mode |= DIGEST_CHECK; #ifdef NNTP if ((gh->master_flag & M_ALWAYS_DIGEST) == 0) mode |= LAZY_BODY; #endif if ((art_file = open_news_article(&art_hdr, mode, nhbuf, (char *)NULL)) == NULL) { #ifdef NNTP import nntp_failed; if (nntp_failed) { /* * connection to nntp_server is broken * stop collection of articles immediately */ return -1; } #endif /* * it is not really necessary to save anything in the data file * we simply use the index file to get the *first* available article */ return 0; } if (art_file == (FILE *)1) { /* empty file */ if (!ignore_bad_articles) return 0; news.ng_groups = NULL; art_file = NULL; } else if ( max_article_age && /* == 0 if use_nntp */ (gh->master_flag & M_INCLUDE_OLD) == 0 && (age = m_time(art_file)) < max_article_age) { if (remove_bad_articles) unlink(group_path_name); log_entry('O', "%sold article (%ld days): %s/%ld", remove_bad_articles ? "removed " : "", (cur_time() - age) / (24 * 60 * 60), current_group->group_name, (long)art_num); bad_count++; fclose(art_file); return 0; } if (ignore_bad_articles && news.ng_groups == NULL) { char *rem = ""; if (!use_nntp && remove_bad_articles) { unlink(group_path_name); rem = "removed "; } log_entry('B', "%sbad article: %s/%ld", rem, current_group->group_name, (long)art_num); if (art_file != NULL) fclose(art_file); bad_count++; return 0; } /* map cross-postings into a list of group numbers */ db_hdr.dh_cross_postings = 0; if (gh->master_flag & M_CONTROL) { /* we cannot trust the Newsgroups: line in the control group */ /* so we simply ignore it (i.e. use "Newsgroups: control") */ goto dont_digest; } if (news.ng_groups) { char *curg, *nextg; group_header *gh1; for (nextg = news.ng_groups, cp_ptr = db_data.dh_cross; *nextg; ) { curg = nextg; if (nextg = strchr(curg, ',')) *nextg++ = NUL; else nextg = ""; if (strcmp(gh->group_name, curg) == 0) gh1 = gh; else if ((gh1 = lookup(curg)) == NULL) continue; *cp_ptr++ = NETW_CROSS_EXT(gh1->group_num); if (++db_hdr.dh_cross_postings == DBUF_SIZE) break; } } if (db_hdr.dh_cross_postings == 1) db_hdr.dh_cross_postings = 0; /* only current group */ if (gh->master_flag & M_NEVER_DIGEST) goto dont_digest; /* split digest */ if ((gh->master_flag & M_ALWAYS_DIGEST) || (news.ng_flag & N_DIGEST)) { int any = 0, cont = 1; skip_digest_body(art_file); while (cont && (cont = get_digest_article(art_file, dgbuf)) >= 0) { if (any == 0) { build_hdr(DH_DIGEST_HEADER); /* write DIGEST_HEADER */ count++; db_hdr.dh_cross_postings = 0; /* no cross post in sub */ any++; } build_hdr(DH_SUB_DIGEST); /* write SUB_DIGEST */ count++; } if (any) goto finish; } /* not a digest */ dont_digest: build_hdr(DH_NORMAL); /* normal article */ count++; finish: if (gh->master_flag & M_AUTO_ARCHIVE) do_auto_archive(gh, art_file, art_num); fclose(art_file); return count; } /* * Collect unread articles in current group * * On entry, init_group has been called to setup the proper environment */ static long collect_group(gh) register group_header *gh; { long article_count, temp, obad; article_number start_collect; if (gh->last_db_article == 0) { gh->first_db_article = gh->first_a_article; gh->last_db_article = gh->first_db_article - 1; } if (gh->last_db_article >= gh->last_a_article) return 0; if (gh->index_write_offset) { ix = open_data_file(gh, 'x', OPEN_UPDATE|MUST_EXIST); fseek(ix, gh->index_write_offset, 0); } else ix = open_data_file(gh, 'x', OPEN_CREATE|MUST_EXIST); if (gh->data_write_offset) { data = open_data_file(gh, 'd', OPEN_UPDATE|MUST_EXIST); fseek(data, gh->data_write_offset, 0); } else data = open_data_file(gh, 'd', OPEN_CREATE|MUST_EXIST); article_count = 0; start_collect = gh->last_db_article+1; if (debug_mode) { printf("\t\t%s (%ld..%ld)\r", gh->group_name, start_collect, gh->last_a_article); fl; } bad_count = obad = 0; while (gh->last_db_article < gh->last_a_article) { if (s_hangup) break; gh->last_db_article++; if (debug_mode) { printf("\r%ld", gh->last_db_article); if (obad != bad_count) printf("\t%ld", bad_count); obad = bad_count; fl; } gh->data_write_offset = ftell(data); #ifdef NNTP gh->index_write_offset = ftell(ix); #endif temp = collect_article(gh, gh->last_db_article); #ifdef NNTP if (temp < 0) { /* connection failed, current article is not collected */ gh->last_db_article--; article_count = -1; goto out; } #endif #ifndef RENUMBER_DANGER if (temp == 0 && gh->data_write_offset == (off_t)0) { gh->first_db_article = gh->last_db_article + 1; continue; } #endif if (!db_write_offset(ix, &(gh->data_write_offset))) write_error(); article_count += temp; } if (start_collect < gh->first_db_article) start_collect = gh->first_db_article; if (trace && start_collect <= gh->last_db_article) log_entry('T', "Col %s (%d to %d) %d", gh->group_name, start_collect, gh->last_db_article, article_count); if (debug_mode) printf("\nCol %s (%d to %d) %d", gh->group_name, start_collect, gh->last_db_article, article_count); gh->data_write_offset = ftell(data); gh->index_write_offset = ftell(ix); out: fclose(data); fclose(ix); if (debug_mode) putchar(NL); return article_count; } do_collect() { register group_header *gh; long col_article_count, temp; int col_group_count; time_t start_time; start_time = cur_time(); col_article_count = col_group_count = 0; current_group = NULL; /* for init_group */ temp = 0; Loop_Groups_Header(gh) { if (s_hangup) { temp = -1; break; } if (gh->master_flag & M_IGNORE_GROUP) continue; if (gh->master_flag & M_MUST_CLEAN) clean_group(gh); if (gh->last_db_article == gh->last_a_article) { if (gh->master_flag & M_BLOCKED) goto unblock_group; continue; } if (!init_group(gh)) { if ((gh->master_flag & M_NO_DIRECTORY) == 0) { log_entry('R', "%s: no directory", gh->group_name); gh->master_flag |= M_NO_DIRECTORY; } gh->last_db_article = gh->last_a_article; gh->first_db_article = gh->last_a_article; /* OBS: not first */ gh->master_flag &= ~(M_EXPIRE | M_BLOCKED); db_write_group(gh); continue; } if (gh->master_flag & M_NO_DIRECTORY) { /* The directory has been created now */ gh->master_flag &= ~M_NO_DIRECTORY; clean_group(gh); } temp = collect_group(gh); #ifdef NNTP if (temp < 0) { /* connection broken */ gh->master_flag &= ~M_EXPIRE; /* remains blocked */ db_write_group(gh); break; } #endif if (temp > 0) { col_article_count += temp; col_group_count++; } unblock_group: gh->master_flag &= ~(M_EXPIRE | M_BLOCKED); db_write_group(gh); } if (col_article_count > 0) log_entry('C', "Collect: %ld art, %d gr, %ld s", col_article_count, col_group_count, cur_time() - start_time); return temp >= 0; }
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.