This is irfiles.h in view mode; [Download] [Up]
/* WIDE AREA INFORMATION SERVER SOFTWARE: No guarantees or restrictions. See the readme file for the full standard disclaimer. Brewster@think.com * * $Log: irfiles.h,v $ * Revision 1.19 92/04/16 20:04:44 morris * small fix to dictionary_blockword_occurances, lenght read was * NEXT_INDEX_BLOCK_SIZE, now its NUMBR_OF_OCCURANCES_SIZE. * * Revision 1.18 92/03/19 09:34:08 morris * fixed the dictionary header to accurately indicate the number of blocks * * Revision 1.17 92/02/17 12:38:00 jonathan * Added defines for catalog. * */ /* include file for irfiles.c */ #ifndef IRFILES_H #define IRFILES_H #include "cdialect.h" #include "cutil.h" #include "hash.h" #include "ustubs.h" /* for time_t */ /* filename extensions for various components */ #define dictionary_ext ".dct" #define filename_table_ext ".fn" #define headline_table_ext ".hl" #define document_table_ext ".doc" #define index_ext ".inv" #define source_ext ".src" #define catalog_ext ".cat" /* these dictionary definitions are used in irhash,irverify, and irfiles */ #define DICTIONARY_HEADER_SIZE 4 #define DICTIONARY_BLOCK_SIZE 1000L /* in entries, not bytes */ #define DICTIONARY_ENTRY_HASH_CODE_SIZE 2 /* #define DICTIONARY_ENTRY_COUNT_SIZE 3 moved to inverted file */ /* #define DICTIONARY_ENTRY_INDEX_BLOCK_SIZE 4 not used and too long a symbol*/ /* #define DICTIONARY_ELEMENT_SIZE 6 was 9 */ #define DICTIONARY_SIZE 524288L #define DICTIONARY_TOTAL_SIZE_WORD "{}" /* the word that holds the total number of words in the whole dictionary */ #define INDEX_HEADER_SIZE 4 #define INDEX_BLOCK_SIZE_SIZE 2 #define NEXT_INDEX_BLOCK_SIZE 4 #define INDEX_BLOCK_FLAG_SIZE 1 #define INDEX_BLOCK_HEADER_SIZE 7 #define NUMBER_OF_OCCURANCES_SIZE 4 #define INDEX_BLOCK_NOT_FULL_FLAG 101 #define INDEX_BLOCK_FULL_FLAG 69 #define INDEX_BLOCK_DICTIONARY_FLAG 123 #define DOCUMENT_ID_SIZE 4 #define WORD_POSITION_SIZE 0 #define CHARACTER_POSITION_SIZE 3 #define WEIGHT_SIZE 1 #define INDEX_ELEMENT_SIZE 8 #define WORD_ID_SIZE 4 /* for posting arrays */ typedef struct database { char* database_file; FILE* dictionary_stream; FILE* filename_table_stream; FILE* headline_table_stream; FILE* document_table_stream; FILE* index_stream; long doc_table_allocated_entries; hashtable* the_word_memory_hashtable; long number_of_words_in_hashtable; /* for building. checked on every add_word. set at start of building, and on every flush.*/ long flush_after_n_words; /* set at the start of building used to compare with number_of_words_in_hashtable. */ long number_of_words; /* for building. number of different words. Set from the headers of .inv files as they are merged. It is used to set the header when a .inv file is first created (not by merging). */ long index_file_number; /* for building. */ long total_word_count; /* Total number of word occurances. set during indexing, saved in dictionary under 'ALL' entry */ void* ext_database; } database; typedef struct document_table_entry { long filename_id; long headline_id; long source_id; /* for signature system */ long start_character; long end_character; long document_length; /* in characters */ long number_of_lines; /* in lines */ time_t date; /* 0 if unknown */ } document_table_entry; #ifdef __cplusplus /* declare these as C style functions */ extern "C" { #endif /* def __cplusplus */ database* openDatabase _AP((char* name, boolean initialize,boolean for_search)); void closeDatabase _AP((database* the_db)); void disposeDatabase _AP((database* the_db)); void initialize_index_files _AP((database* db)); char *read_filename_table_entry _AP((long position, char* filename, char* type, time_t* file_write_date, database* db)); long write_filename_table_entry _AP((char* filename, char *type, database* db)); boolean filename_in_database _AP((char *filename, char *type, time_t *write_file_date, database *db)); boolean filename_in_filename_file _AP ((char *filename, char*type, time_t *file_write_date, char* filename_file)); char *read_headline_table_entry _AP((long position,database* db)); long write_headline_table_entry _AP((char* headline, database* db)); boolean read_document_table_entry _AP((document_table_entry* doc_entry,long number,database* db)); long write_document_table_entry _AP((document_table_entry* doc_table_entry, database* db)); boolean writeUserValToDocIDTable _AP((unsigned long userVal,long doc, database* db)); long next_document_id _AP((database* db)); void close_dictionary_file _AP((database *db)); long add_word_to_dictionary _AP((char *word, long index_file_block_number, long number_of_occurances, database* db)); long look_up_word_in_dictionary _AP((char *word, long *word_id, database* db)); long init_dict_file_for_writing _AP((database *db)); void init_dict_file_detailed _AP((FILE* dictionary_stream, long number_of_blocks)); void record_num_blocks_in_dict _AP((FILE* dictionary_stream, long number_of_words)); long finished_add_word_to_dictionary _AP((database *db)); boolean register_src_structure _AP((char *filename)); boolean write_src_structure _AP((char *filename, char *database_name, char *typename, char **filenames, long number_of_filename, boolean export_database, long tcp_port)); boolean build_catalog _AP((database* db)); long allocate_index_block _AP((long how_large, FILE* stream)); unsigned char *read_dictionary_block _AP((unsigned char* block, long position,long length, FILE* stream)); void print_dictionary _AP((database* db)); #define DICTIONARY_ENTRY_SIZE 29 /* sum of MAX_WORD_LENGTH, 1 ('\0'), NEXT_INDEX_BLOCK_SIZE and NUMBER_OF_OCCURANCES_SIZE */ #ifdef DICT_FUNC char *dictionary_block_word _AP((long i,unsigned char* block)); long dictionary_block_position _AP((long i,unsigned char* block)); long dictionary_block_word_occurances _AP((long i,unsigned char* block)); #else /* macros */ #define dictionary_block_word(i,block) \ ((char *)((block) + ((i) * DICTIONARY_ENTRY_SIZE))) #define dictionary_block_position(i,block) \ read_bytes_from_memory(NEXT_INDEX_BLOCK_SIZE, \ (block) + ((i) * DICTIONARY_ENTRY_SIZE) + \ MAX_WORD_LENGTH + 1) #define dictionary_block_word_occurances(i,block) \ read_bytes_from_memory(NUMBER_OF_OCCURANCES_SIZE, \ (block) + ((i) * DICTIONARY_ENTRY_SIZE) + \ MAX_WORD_LENGTH + 1 + NEXT_INDEX_BLOCK_SIZE) #endif void print_dictionary_block _AP((unsigned char* block,long size)); /* database functions */ char* dictionary_filename _AP((char* destination, database* db)); char* filename_table_filename _AP((char* destination, database* db)); char* headline_table_filename _AP((char* destination, database* db)); char* document_table_filename _AP((char* destination, database* db)); char* index_filename _AP((char* destination, database* db)); char* index_filename_with_version _AP((long version, char* destination, database* db)); char* source_filename _AP((char* destination, database* db)); #ifdef __cplusplus } #endif /* def __cplusplus */ #endif /* IRFILES_H */
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.