123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506 |
- /*
- * CDE - Common Desktop Environment
- *
- * Copyright (c) 1993-2012, The Open Group. All rights reserved.
- *
- * These libraries and programs are free software; you can
- * redistribute them and/or modify them under the terms of the GNU
- * Lesser General Public License as published by the Free Software
- * Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * These libraries and programs are distributed in the hope that
- * they will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU Lesser General Public License for more
- * details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with these libraries and programs; if not, write
- * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
- * Floor, Boston, MA 02110-1301 USA
- */
- /*
- * COMPONENT_NAME: austext
- *
- * FUNCTIONS: CNCRD_MEMORY_AREA_LIST
- * QUERY_STEM_STR
- * STAT_STR
- * TREENODE
- * build_bin_tree
- * comp_stat
- * descend_tree
- * efim_qsort
- * fill_stem
- * get_next_memory_block
- * init_global_memory
- * init_memory
- * inv_index_bin_tree
- * load_ditto_str
- * release_shm_mem
- * stat_search
- * traverse_tree
- * ve_statistical
- *
- * ORIGINS: 27
- *
- * (C) COPYRIGHT International Business Machines Corp. 1993,1995
- * All Rights Reserved
- * US Government Users Restricted Rights - Use, duplication or
- * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
- */
- /*************************** VESTATIS.C ****************************
- * $XConsortium: vestatis.c /main/9 1996/11/25 18:49:04 drk $
- * 1993.
- * Statistically sorted stems search.
- *
- * $Log$
- * Revision 2.3 1996/02/01 19:35:55 miker
- * AusText 2.1.11, DtSearch 0.3: Uses new single word parser/stemmers.
- *
- * Revision 2.2 1995/10/25 15:00:05 miker
- * Added prolog.
- *
- * Revision 2.1 1995/09/22 22:30:42 miker
- * Freeze DtSearch 0.1, AusText 2.1.8
- * Revision 1.11 1995/09/07 23:30:15 miker
- * ...One last try (sigh).
- * Revision 1.10 1995/09/07 19:08:01 miker
- * Last fix incorrectly coded.
- * Revision 1.9 1995/09/07 16:25:11 miker
- * Fixed solaris bus fault caused by TREENODE structure
- * not being aligned on machines word boundary. Fault occurred
- * only when query contained more than one word.
- * Revision 1.8 1995/09/05 19:31:37 miker
- * Made usrblk and ausapi_msglist global. Replaced Socrates()
- * with calls to parser() and stemmer(). Deleted socblk.
- * Numerous name changes. All for DtSearch...
- */
- #ifndef _ALL_SOURCE
- # define _ALL_SOURCE /* to pickup typedefs for shm vnodes */
- #endif
- #include "SearchE.h"
- #include <stdlib.h>
- #include <string.h>
- #include <ctype.h>
- #include <math.h>
- #include <sys/stat.h>
- #include <sys/types.h>
- #include <sys/ipc.h>
- #include <sys/shm.h>
- #include "vista.h"
- /*-------------------------- GLOBALS ----------------------------*/
- /**** declaration for the global memory pointers ****/
- #define PROGNAME "VESTATIS"
- #define MEMORY_SIZE 64000 /* 65536 is 64 KBytes of memory */
- #define REC_TYPES 256
- #define NORM_VALUE 30
- #undef INFINITY /* XXX does GCC's __builtin_inff() work here? */
- #define INFINITY 9999.0
- #define SORT_MESG 10000
- #define CHAR_BITS 8
- #define STACKSZ 256
- #define MED_3_VALUE 7
- #define TIME_ITERATION 1
- #define LOG2 0.693147181
- #define MS_vestatis 17
- #define STRUCT_ALIGN sizeof(char*)
- static int SHM_FLAG = IPC_CREAT | S_IRUSR | S_IWUSR | S_IWGRP |
- S_IRGRP | S_IROTH | S_IWOTH;
- static char *mem_start;
- static char *cur_pos;
- static long mem_offset;
- static long total_memory_size;
- typedef struct q_s {
- char stem[DtSrMAXWIDTH_HWORD];
- int count;
- } QUERY_STEM_STR;
- typedef struct mem_area {
- char *start_of_mem_block;
- long block_size;
- struct mem_area *next_block;
- } CNCRD_MEMORY_AREA_LIST;
- typedef struct bintree {
- struct bintree *rlink; /* ptr to next node in linked list or
- * right link in binary tree */
- struct bintree *llink; /* left link in binary tree */
- char *word; /* ptr to word in the query */
- int count;
- } TREENODE;
- typedef struct s_a {
- DB_ADDR dba;
- float wght;
- DtSrINT32 num_word_hits;
- } STAT_STR;
- static STAT_STR *stat_array = NULL;
- static TREENODE *root_node;
- static TREENODE *top_of_stack;
- static TREENODE *stack;
- static TREENODE *pres;
- static TREENODE *prev;
- static TREENODE *next;
- static TREENODE *avail_node;
- static CNCRD_MEMORY_AREA_LIST *memory_blocks = NULL;
- static CNCRD_MEMORY_AREA_LIST *cur_mem_ptr;
- static QUERY_STEM_STR *query_stems = NULL;
- static DB_ADDR *word_addrs = NULL;
- static int num_diff_words = 0;
- static char begin_search;
- static char begin_sort;
- static char begin_load_ditto;
- static char begin_qsort;
- static char qsort_done;
- static DtSrINT32 real_num_rec;
- static DtSrINT32 num_hits;
- static DtSrINT32 total_num_addrs;
- static DtSrINT32 dba_offset;
- static unsigned char rec_type_tab[REC_TYPES];
- static char vestat_msgbuf[256];
- static int mes_search_box;
- static int slot_d00;
- extern char *chmat ();
- extern void find_keyword (char *cur_word, int vista_num);
- extern void read_wordstr (struct or_hwordrec * glob_word, int vista_num);
- extern void write_wordstr (struct or_hwordrec * glob_word, int vista_num);
- static void stat_search (void); /* redefined below */
- /********************************/
- /* */
- /* Release Shared Memory */
- /* */
- /********************************/
- void release_shm_mem (void)
- {
- if (global_memory_ptr != NULL) {
- if (shmdt (global_memory_ptr) == -1) {
- DtSearchAddMessage (CATGETS(dtsearch_catd, MS_vestatis, 104,
- PROGNAME "104 Cannot detach shared memory "));
- OE_flags |= OE_PERMERR;
- usrblk.retncode = OE_ABORT;
- return;
- }
- if (shmctl (shm_id, IPC_RMID, NULL) == -1) {
- DtSearchAddMessage (CATGETS(dtsearch_catd, MS_vestatis, 110,
- PROGNAME "110 Cannot remove shared memory "));
- OE_flags |= OE_PERMERR;
- usrblk.retncode = OE_ABORT;
- return;
- }
- global_memory_ptr = NULL;
- }
- return;
- } /* release_shm_mem() */
- /********************************/
- /* */
- /* Init Global Memory */
- /* */
- /********************************/
- /* addrs - largest DBA slot in D00 file in the current database
- * r_addrs - total records count in the current database.
- */
- static int init_global_memory (DtSrINT32 addrs, DtSrINT32 r_addrs)
- {
- long i, j;
- size_t k;
- i = DtSrMAX_STEMCOUNT * ((addrs >> 3) + 1) * 2 +
- addrs * sizeof (int) + sizeof (DB_ADDR) * r_addrs;
- j = sizeof (STAT_STR) * addrs + sizeof (DB_ADDR) * r_addrs;
- k = (i > j) ? i : j;
- shm_id = shmget (IPC_PRIVATE, k, SHM_FLAG);
- if ((global_memory_ptr = (char *) shmat (shm_id, (char *) 0, 0)) ==
- ((char *) -1)) {
- DtSearchAddMessage (CATGETS(dtsearch_catd, MS_vestatis, 115,
- PROGNAME "115 No shared memory available"));
- OE_flags |= OE_PERMERR;
- usrblk.retncode = OE_ABORT;
- return FALSE;
- }
- return TRUE;
- } /* init_global_memory() */
- /****************************************/
- /* */
- /* efim_qsort */
- /* */
- /****************************************/
- /* Custom quick sort algorithm (medium-of-3 partitioning).
- * Coded for efficiency given our expected data characteristics,
- * and for interruptability.
- */
- int efim_qsort (void)
- {
- time_t start_time;
- double time_dif;
- static long left, right;
- static long scan_l, scan_r, mid3, pvidx, l_size, r_size;
- static long sptr;
- static float pivot, temp, stack_l[STACKSZ], stack_r[STACKSZ];
- static DB_ADDR dba;
- /* Test whether user has pushed STOP button since last call. */
- if (usrblk.flags & USR_STOPSRCH) {
- if (OE_flags & OE_AUDIT)
- oe_write_audit_rec (-1L);
- usrblk.retncode = OE_USER_STOP;
- release_shm_mem ();
- return TRUE;
- }
- if (begin_qsort) {
- sptr = 0;
- left = 0;
- right = num_hits - 1;
- begin_qsort = FALSE;
- }
- time (&start_time);
- for (;;) {
- /* check iteration loop */
- time_dif = difftime (time (NULL), start_time);
- if ((time_dif > TIME_ITERATION
- || usrblk.debug & USRDBG_ITERATE) &&
- !(usrblk.flags & USR_NO_ITERATE)) {
- usrblk.retncode = OE_SEARCHING;
- usrblk.workproc = stat_search;
- mes_search_box = TRUE;
- return TRUE;
- }
- while (right > left) {
- if ((right - left) > MED_3_VALUE) {
- /*
- * compute value for the median-of-three partitioning
- */
- mid3 = (left + right) >> 1;
- /*
- * three-sort left, middle, and right elements
- */
- if ((stat_array + left)->wght < (stat_array + mid3)->wght) {
- temp = (stat_array + left)->wght;
- (stat_array + left)->wght =
- (stat_array + mid3)->wght;
- (stat_array + mid3)->wght = temp;
- dba = (stat_array + left)->dba;
- (stat_array + left)->dba =
- (stat_array + mid3)->dba;
- (stat_array + mid3)->dba = dba;
- }
- if ((stat_array + left)->wght < (stat_array + right)->wght) {
- temp = (stat_array + left)->wght;
- (stat_array + left)->wght =
- (stat_array + right)->wght;
- (stat_array + right)->wght = temp;
- dba = (stat_array + left)->dba;
- (stat_array + left)->dba =
- (stat_array + right)->dba;
- (stat_array + right)->dba = dba;
- }
- if ((stat_array + mid3)->wght < (stat_array + right)->wght) {
- temp = (stat_array + mid3)->wght;
- (stat_array + mid3)->wght =
- (stat_array + right)->wght;
- (stat_array + right)->wght = temp;
- dba = (stat_array + mid3)->dba;
- (stat_array + mid3)->dba =
- (stat_array + right)->dba;
- (stat_array + right)->dba = dba;
- }
- /* select pivot element index */
- pvidx = right - 1;
- /* exchange pivot with the middle element */
- temp = (stat_array + mid3)->wght;
- (stat_array + mid3)->wght = (stat_array + pvidx)->wght;
- (stat_array + pvidx)->wght = temp;
- dba = (stat_array + mid3)->dba;
- (stat_array + mid3)->dba = (stat_array + pvidx)->dba;
- (stat_array + pvidx)->dba = dba;
- /* setup for partitioning */
- scan_l = left + 1;
- scan_r = right - 2;
- }
- else {
- /* select pivot element index */
- pvidx = right;
- /* set scanning indexes */
- scan_l = left;
- scan_r = right - 1;
- }
- /* select pivot element */
- pivot = (stat_array + pvidx)->wght;
- for (;;) {
- /* scan from left */
- while ((stat_array + scan_l)->wght > pivot) {
- scan_l++;
- }
- /* scan from right */
- while ((stat_array + scan_r)->wght < pivot) {
- if (scan_r == 0) {
- break;
- }
- scan_r--;
- }
- /* if scan have met, exit inner loop */
- if (scan_l >= scan_r) {
- break;
- }
- /* exchange elements */
- temp = (stat_array + scan_r)->wght;
- (stat_array + scan_r)->wght = (stat_array + scan_l)->wght;
- (stat_array + scan_l)->wght = temp;
- dba = (stat_array + scan_r)->dba;
- (stat_array + scan_r)->dba = (stat_array + scan_l)->dba;
- (stat_array + scan_l)->dba = dba;
- /* move scans to next elements */
- scan_l++;
- scan_r--;
- }
- if (scan_l != pvidx) {
- /* exchange finale element */
- temp = (stat_array + pvidx)->wght;
- (stat_array + pvidx)->wght = (stat_array + scan_l)->wght;
- (stat_array + scan_l)->wght = temp;
- dba = (stat_array + pvidx)->dba;
- (stat_array + pvidx)->dba = (stat_array + scan_l)->dba;
- (stat_array + scan_l)->dba = dba;
- }
- /* calculate section sizes */
- l_size = scan_l - left;
- r_size = right - scan_l;
- /* place largest section on stack */
- if (l_size > r_size) {
- /* ignore 1-element sections */
- if (l_size > 1) {
- sptr++;
- if (sptr == STACKSZ) {
- fputs (CATGETS(dtsearch_catd, MS_vestatis, 107,
- PROGNAME "107 Qsort stack overflow.\n"),
- aa_stderr);
- OE_flags |= OE_PERMERR;
- usrblk.retncode = OE_ABORT;
- release_shm_mem ();
- return FALSE;
- }
- *(stack_l + sptr) = left;
- *(stack_r + sptr) = scan_l - 1;
- }
- /* ignore 1-element sections */
- if (r_size != 0) {
- left = scan_l + 1;
- }
- else {
- break;
- }
- }
- else {
- /* ignore 1-element sections */
- if (r_size > 1) {
- sptr++;
- if (sptr == STACKSZ) {
- fputs (CATGETS(dtsearch_catd, MS_vestatis, 107,
- PROGNAME "107 Qsort stack overflow.\n"),
- aa_stderr);
- OE_flags |= OE_PERMERR;
- usrblk.retncode = OE_ABORT;
- release_shm_mem ();
- return FALSE;
- }
- *(stack_l + sptr) = scan_l + 1;
- *(stack_r + sptr) = right;
- }
- /* ignore 1-element sections */
- if (l_size != 0) {
- right = scan_l - 1;
- }
- else {
- break;
- }
- }
- }
- /* iterate with values from stack (if any) */
- if (sptr) {
- left = *(stack_l + sptr);
- right = *(stack_r + sptr);
- sptr--;
- }
- else {
- break;
- }
- }
- qsort_done = TRUE;
- return TRUE;
- } /* efim_qsort() */
- /****************************************/
- /* */
- /* fill_stem */
- /* */
- /****************************************/
- /* "Visit" subroutine of descend_tree(), which is itself subroutine
- * of traverse_tree(). Builds query_stems array
- * and establishes its size in num_diff_words.
- */
- static void fill_stem (TREENODE * cur_stem)
- {
- query_stems[num_diff_words].count = cur_stem->count;
- strcpy (query_stems[num_diff_words].stem, cur_stem->word);
- num_diff_words++;
- return;
- } /* fill_stem() */
- /****************************************/
- /* */
- /* descend_tree */
- /* */
- /****************************************/
- /* Subroutine of traverse_tree(), Robson tree traversal algorithm. */
- static void descend_tree (void)
- {
- int not_done = TRUE;
- while (not_done) {
- /* end of 'descent' subalgorithm? */
- if ((pres->llink == NULL) && (pres->rlink == NULL)) {
- /* Preorder, Symmetric Order and Postorder */
- fill_stem (pres);
- avail_node = pres;
- return;
- }
- if (pres->llink != NULL) {
- /* Preorder */
- fill_stem (pres);
- next = pres->llink;
- pres->llink = prev;
- prev = pres;
- pres = next;
- }
- else {
- /* Preorder and Symmetric Order */
- fill_stem (pres);
- next = pres->rlink;
- pres->rlink = prev;
- prev = pres;
- pres = next;
- }
- }
- return;
- } /* descend_tree() */
- /********************************/
- /* */
- /* traverse_tree */
- /* */
- /********************************/
- /* The algorithm is based on the J. M. ROBSON link inversion traversal
- * algorithm for binary trees. Ref. Thomas A. STANDISH pp. 77-78.
- */
- static void traverse_tree (void)
- {
- int not_done = TRUE;
- int descend = TRUE;
- /* initialize the variables */
- pres = root_node;
- prev = pres;
- top_of_stack = NULL;
- stack = NULL;
- while (not_done) {
- if (descend) {
- descend_tree ();
- }
- if (pres == root_node) {
- return;
- }
- if (prev->rlink == NULL) {
- /* Symmetric Order and Postorder */
- /*** fill_stem(prev); ***/
- next = prev->llink;
- prev->llink = pres;
- pres = prev;
- prev = next;
- descend = FALSE;
- }
- else {
- if (prev->llink == NULL) {
- /* Postorder */
- /** fill_stem(prev); **/
- next = prev->rlink;
- prev->rlink = pres;
- pres = prev;
- prev = next;
- descend = FALSE;
- }
- else {
- if (prev == top_of_stack) {
- /* Postorder */
- /** fill_stem(prev); **/
- next = stack;
- top_of_stack = stack->rlink;
- stack = stack->llink;
- next->llink = NULL;
- next->rlink = NULL;
- next = prev->llink;
- prev->llink = prev->rlink;
- prev->rlink = pres;
- pres = prev;
- prev = next;
- descend = FALSE;
- }
- else {
- /* Symmetric Order */
- /*** fill_stem(prev); ***/
- avail_node->llink = stack;
- avail_node->rlink = top_of_stack;
- stack = avail_node;
- top_of_stack = prev;
- next = prev->rlink;
- prev->rlink = pres;
- pres = next;
- descend = TRUE;
- }
- }
- }
- }
- } /* traverse_tree() */
- /********************************/
- /* */
- /* Get Next Memory Block */
- /* */
- /********************************/
- void get_next_memory_block (size_t node_size)
- {
- CNCRD_MEMORY_AREA_LIST *temp_ptr;
- temp_ptr = memory_blocks;
- /*
- * We run out of pre-allocated memory. Allocate additional block of
- * memory
- */
- if (cur_mem_ptr == NULL) {
- total_memory_size += node_size;
- mem_start = (char *) malloc (total_memory_size);
- mem_offset = 0L;
- mem_offset += node_size;
- cur_pos = mem_start;
- if (mem_start == NULL) {
- fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_vestatis, 310,
- "%s Out of Memory. Need %ld bytes.\n"),
- PROGNAME "310", total_memory_size);
- OE_flags |= OE_PERMERR;
- usrblk.retncode = OE_ABORT;
- release_shm_mem ();
- return;
- }
- /*
- * allocate space for the next member of the memory blocks link
- * list
- */
- memory_blocks = (CNCRD_MEMORY_AREA_LIST *)
- malloc (sizeof (CNCRD_MEMORY_AREA_LIST) + 2);
- if (memory_blocks == NULL) {
- fputs (CATGETS(dtsearch_catd, MS_vestatis, 314,
- PROGNAME"314 Out of Memory.\n"), aa_stderr);
- OE_flags |= OE_PERMERR;
- usrblk.retncode = OE_ABORT;
- release_shm_mem ();
- return;
- }
- memory_blocks->start_of_mem_block = mem_start;
- memory_blocks->next_block = temp_ptr;
- memory_blocks->block_size = total_memory_size;
- /**** allocation of initial memory blocks is done ****/
- }
- /* Use next available block of memory */
- else {
- mem_start = cur_mem_ptr->start_of_mem_block;
- total_memory_size = cur_mem_ptr->block_size;
- cur_mem_ptr = cur_mem_ptr->next_block;
- mem_offset = 0L;
- mem_offset += node_size;
- cur_pos = mem_start;
- }
- return;
- } /* get_next_memory_block() */
- /********************************/
- /* */
- /* build_bin_tree */
- /* */
- /********************************/
- /* Subroutine of inv_index_bin_tree().
- * Called for each stem in query.
- * Inserts new stem (already uppercase) into tree
- * or increments existing stem's count.
- * Returns TRUE and incr num_diff_words if new stem inserted.
- * Returns FALSE if existing stem's count merely incremented.
- * Returns FALSE and OE_ABORT set on error.
- */
- static int build_bin_tree (char *cur_word)
- {
- int i;
- int wordlen;
- size_t treenode_size;
- TREENODE *new;
- TREENODE **this_link;
- wordlen = strlen (cur_word);
- /* Determine the amount of memory needed for the
- * new node. Add in a pad amount to align it
- * on the machine's word (integer) boundary.
- * Some machines aren't happy about misaligned
- * structures and we're emulating our own malloc.
- * (Thanks, and a tip o' the hat to Takuki Kamiya).
- */
- treenode_size = sizeof (TREENODE) + wordlen + 2;
- treenode_size +=
- (STRUCT_ALIGN - treenode_size % STRUCT_ALIGN) % STRUCT_ALIGN;
- /* allocate a new node and load its data fields */
- mem_offset += treenode_size;
- if (mem_offset > total_memory_size) {
- /* allocate new chunk of memory */
- get_next_memory_block (treenode_size);
- if (usrblk.retncode == OE_ABORT)
- return FALSE;
- }
- new = (TREENODE *) cur_pos;
- cur_pos = mem_start + mem_offset;
- new->llink = NULL;
- new->rlink = NULL;
- new->word = (char *) new + sizeof (TREENODE);
- new->count = 1;
- strcpy (new->word, cur_word);
- /* Insert current word into binary tree */
- for (this_link = &root_node; *this_link != NULL;) {
- i = strcmp (new->word, (*this_link)->word);
- /* Test for current word already in the binary tree */
- if (i == 0) {
- mem_offset -= treenode_size;
- cur_pos = mem_start + mem_offset;
- (*this_link)->count++;
- return FALSE; /* no point in continuing descent */
- }
- /* Descend tree to find correct insertion point */
- this_link = (i < 0) ?
- &(*this_link)->llink : &(*this_link)->rlink;
- } /* end for loop to find tree insertion
- * point */
- /* Insert new node at current location in tree */
- *this_link = new;
- num_diff_words++;
- return TRUE;
- } /* build_bin_tree() */
- /************************/
- /* */
- /* init_memory */
- /* */
- /************************/
- /* Initialize the first block of memory for the binary tree.
- * This function is called only once at each run of the offline program.
- */
- void init_memory (void)
- {
- mem_start = (char *) malloc (MEMORY_SIZE);
- if (mem_start == NULL) {
- fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_vestatis, 310,
- "%s Out of Memory. Need %ld bytes.\n"), PROGNAME "310", MEMORY_SIZE);
- OE_flags |= OE_PERMERR;
- usrblk.retncode = OE_ABORT;
- release_shm_mem ();
- return;
- }
- total_memory_size = MEMORY_SIZE;
- cur_pos = mem_start;
- mem_offset = 0L;
- /*
- * Allocate space for the first member of the memory blocks link list
- */
- memory_blocks = (CNCRD_MEMORY_AREA_LIST *)
- malloc (sizeof (CNCRD_MEMORY_AREA_LIST) + 2);
- if (memory_blocks == NULL) {
- fputs (CATGETS(dtsearch_catd, MS_vestatis, 314,
- PROGNAME "314 Out of Memory.\n"), aa_stderr);
- OE_flags |= OE_PERMERR;
- usrblk.retncode = OE_ABORT;
- release_shm_mem ();
- return;
- }
- memory_blocks->start_of_mem_block = mem_start;
- memory_blocks->block_size = total_memory_size;
- memory_blocks->next_block = NULL;
- cur_mem_ptr = NULL;
- return;
- } /* init_memory() */
- /********************************/
- /* */
- /* inv_index_bin_tree */
- /* */
- /********************************/
- /* Builds binary tree of all stems in query.
- * Returns TRUE and loads num_diff_words with number
- * of nodes in tree if tree successfully built,
- * or if query is empty.
- * Returns FALSE on any error (causing eventual engine abort).
- */
- static int inv_index_bin_tree (void)
- {
- char *cptr;
- DBLK *dblk = usrblk.dblk;
- PARG parg;
- /* First time initialize the first block of memory */
- if (memory_blocks == NULL) {
- /** INITIALIZE MEMORY **/
- init_memory ();
- if (usrblk.retncode == OE_ABORT)
- return FALSE;
- root_node = NULL;
- }
- /* WORD LOOP. Parse and stem each word in query.
- * Add each stem to bin tree or incr its count.
- */
- memset (&parg, 0, sizeof(PARG));
- parg.dblk = dblk;
- parg.string = usrblk.query;
- for ( cptr = dblk->parser (&parg);
- cptr;
- cptr = dblk->parser (NULL)) {
- build_bin_tree (dblk->stemmer (cptr, dblk));
- if (usrblk.retncode == OE_ABORT)
- return FALSE;
- }
- return TRUE;
- } /* inv_index_bin_tree() */
- /************************/
- /* */
- /* comp_stat */
- /* */
- /************************/
- int comp_stat (void *val1, void *val2)
- {
- STAT_STR *bkt1;
- STAT_STR *bkt2;
- bkt1 = (STAT_STR *) val1;
- bkt2 = (STAT_STR *) val2;
- if ((bkt2->wght) > (bkt1->wght)) {
- return 1;
- }
- else {
- return -1;
- }
- } /* comp_stat() */
- /************************************************/
- /* */
- /* load_ditto_str */
- /* */
- /************************************************/
- /* Last function called from statistical search.
- * Builds a real AusText hitlist from the sorted stat_array,
- * translating the statistical weights to AusText 'proximity'
- * values, and truncating the hitlist at user's maxhits.
- * Working variables made static for speeeeeeeed.
- */
- void load_ditto_str (void)
- {
- struct or_objrec cur_rec; /* structure taken from austext.h */
- struct or_miscrec rec_data;
- static time_t start_time;
- static double time_dif;
- static DB_ADDR dba1;
- static DtSrResult *cur_ditto_mem;
- static DtSrResult *ditto_llist;
- static DtSrResult *temp_ditto;
- static int debugging;
- static int m;
- static DtSrINT32 d0024;
- static DtSrINT32 maxhits;
- static DtSrINT32 i32, i32_start, j32;
- static int fzkeysz, fzkey_remaining, abstrsz, dittosz;
- static char *src, *targ, *targend;
- static int check_dates = FALSE;
- static double sum = 0.0;
- static double sum1, sum2, sum3, sum4;
- debugging = (usrblk.debug & USRDBG_SRCHCMPL);
- maxhits = usrblk.dblk->maxhits;
- fzkeysz = usrblk.dblk->dbrec.or_fzkeysz;
- abstrsz = usrblk.dblk->dbrec.or_abstrsz;
- dittosz = sizeof (DtSrResult) + abstrsz + 16;
- if (debugging)
- fprintf (aa_stderr, PROGNAME "773 "
- "numhits=%ld maxhits=%d numwords=%d abstrsz=%d\n",
- (long)num_hits, (int)maxhits, num_diff_words, abstrsz);
- if (begin_load_ditto) {
- /* test for zero hits */
- if (num_hits == 0) {
- usrblk.workproc = dummy_workproc;
- usrblk.retncode = OE_NOTAVAIL;
- if (OE_flags & OE_AUDIT)
- oe_write_audit_rec (0L);
- release_shm_mem ();
- return;
- }
- check_dates = (usrblk.objdate1 || usrblk.objdate2);
- /* In order to translate statistical weight into an AusText
- * proximity, basically you have to invert it, then scale it.
- * The statistical weight is a similarity measure: the
- * larger it is the more similar the document to the query.
- * But AusText 'proximity' is like a 'distance' measure,
- * the smaller the number the closer the document is to the query.
- *
- * First 'normalize' each document's statistical
- * weight to be a fraction between 0 and 1. Do this
- * by calculating a normalization factor (sum1), the
- * sqrt of the sum of squares of first NORM_VALUE weights.
- * (Trying to make the inversion scheme produce
- * reasonable proximity numbers for these first records).
- *
- * To complete proximity initialization, he uses
- * the sum1 factor to determine and keep the first record's
- * normalized weight (sum), presumably a fraction close
- * to 1.0, and the first record's proximity (sum2),
- * basically the percent
- * value that the first doc is 'distant' from perfection (1.0 or 100%).
- * For example, if the normalized weight of the first record is .931
- * then the proximity will be 7 (100% - 93% = 7%). He does this now
- * because he's going to use this first proximity (sum2) as a scaling
- * factor to stretch out all the subsequent proximities so they
- * look reasonable.
- */
- sum = 0.0;
- for (i32 = 0; i32 < num_hits; i32++) {
- sum1 = (double) (stat_array + i32)->wght /
- (double) num_diff_words;
- sum += sum1 * sum1;
- if (i32 >= NORM_VALUE)
- break;
- }
- /*
- * sum1 = normalization factor.
- * sum = normalized weight (betw 0 and 1) of first record.
- * sum2 = proximity of first record, proximity scale factor.
- */
- sum1 = sqrt (sum);
- sum = ((stat_array + 0)->wght / num_diff_words) / sum1;
- sum2 = (1.0 - sum) * 100.0;
- if (debugging)
- fprintf (aa_stderr, PROGNAME "844 "
- "normfac=%.2lf normwt(#1)=%.2lf prox(#1)=%.2lf\n",
- sum1, sum, sum2);
- /* Preallocate first hit on ditto_list */
- ditto_llist = (DtSrResult *) austext_malloc (dittosz,
- PROGNAME "449", NULL);
- j32 = 0;
- i32_start = 0;
- d0024 = OR_D00 << 24;
- begin_load_ditto = FALSE;
- } /* endif (begin_load_ditto) */
- /* Test whether user has pushed STOP button since last call */
- if (usrblk.flags & USR_STOPSRCH) {
- if (OE_flags & OE_AUDIT)
- oe_write_audit_rec (-1L);
- usrblk.retncode = OE_USER_STOP;
- release_shm_mem ();
- if (j32 == 0)
- free (ditto_llist);
- else
- free_llist ((LLIST **) &ditto_llist);
- return;
- }
- time (&start_time);
- /**** MAIN DtSrResult LIST BUILD LOOP ****/
- for (i32 = i32_start; i32 < num_hits; i32++) {
- /* check iteration loop */
- time_dif = difftime (time (NULL), start_time);
- if ((time_dif > TIME_ITERATION
- || usrblk.debug & USRDBG_ITERATE) &&
- !(usrblk.flags & USR_NO_ITERATE)) {
- i32_start = i32;
- usrblk.retncode = OE_SEARCHING;
- usrblk.workproc = load_ditto_str;
- mes_search_box = TRUE;
- return;
- }
- dba1 = ((stat_array + i32)->dba * slot_d00 - dba_offset)
- | d0024;
- /*
- * Don't use CRSET or RECREAD macros here so we can trap invalid
- * dba errs.
- */
- d_crset (&dba1, saveusr.vistano);
- if (db_status < 0) {
- fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_vestatis, 437,
- "%s: db_status = %d, dba = %d:%ld (x'%08.8lx'), vistano = %d\n"),
- PROGNAME "437", db_status, (dba1 & 0xff000000) >> 24,
- dba1 & 0xffffff, dba1, saveusr.vistano);
- OE_flags |= OE_PERMERR;
- usrblk.retncode = OE_ABORT;
- release_shm_mem ();
- return;
- }
- d_recread (&cur_rec, saveusr.vistano);
- if (db_status < 0) {
- fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_vestatis, 437,
- "%s: db_status = %d, dba = %d:%ld (x'%08.8lx'), vistano = %d\n"),
- PROGNAME "437", db_status, (dba1 & 0xff000000) >> 24,
- dba1 & 0xffffff, dba1, saveusr.vistano);
- OE_flags |= OE_PERMERR;
- usrblk.retncode = OE_ABORT;
- release_shm_mem ();
- return;
- }
- swab_objrec (&cur_rec, NTOH);
- /* Skip any record with undesired keytype
- * char, ie first char of key.
- */
- if (*(rec_type_tab + cur_rec.or_objkey[0]) == 0)
- continue;
- /* Skip record if out of date range. */
- if (check_dates)
- if (!objdate_in_range (cur_rec.or_objdate,
- usrblk.objdate1, usrblk.objdate2))
- continue;
- if (j32 == 0) /* first ditto node already allocated */
- cur_ditto_mem = ditto_llist;
- else {
- cur_ditto_mem = malloc (dittosz);
- if (cur_ditto_mem == NULL) {
- fputs ( CATGETS(dtsearch_catd, MS_vestatis, 504,
- PROGNAME "504 Cannot allocate cur_ditto\n"),
- aa_stderr);
- OE_flags |= OE_PERMERR;
- usrblk.retncode = OE_ABORT;
- release_shm_mem ();
- return;
- }
- temp_ditto->link = cur_ditto_mem;
- }
- /* Load the ditto_list for this dba */
- memset (cur_ditto_mem, 0, sizeof(DtSrResult));
- cur_ditto_mem->dbn = OE_dbn;
- cur_ditto_mem->dba = dba1;
- strcpy (cur_ditto_mem->reckey, cur_rec.or_objkey);
- cur_ditto_mem->objsize = cur_rec.or_objsize;
- cur_ditto_mem->objdate = cur_rec.or_objdate;
- cur_ditto_mem->objflags = cur_rec.or_objflags;
- cur_ditto_mem->objuflags = cur_rec.or_objuflags;
- cur_ditto_mem->objtype = cur_rec.or_objtype;
- cur_ditto_mem->objcost = cur_rec.or_objcost;
- /*****cur_ditto_mem->flags = 0;****/
- cur_ditto_mem->abstractp = (char *) cur_ditto_mem +
- sizeof (DtSrResult);
- cur_ditto_mem->abstractp[0] = 0;
- /* Translate statistical weight into AusText proximity.
- * sum3 = normalized weight (betw 0 and 1).
- * sum4 = prox = ratio of this normalized weight to
- * first rec's weight, scaled by the first rec's proximity.
- * No proximity is allowed to exceed some very large number.
- */
- sum3 = ((stat_array + i32)->wght / num_diff_words) / sum1;
- sum4 = sum2 * (sum / sum3);
- if (sum4 > INFINITY)
- sum4 = INFINITY;
- cur_ditto_mem->proximity = sum4;
- if (debugging)
- fprintf (aa_stderr,
- " --> dba=%ld normwt=%.4lf prox=%d key='%s'\n",
- dba1, sum3, cur_ditto_mem->proximity,
- cur_ditto_mem->reckey);
- /*
- * The abstract immediately follows the fuzzy key in the FZKABS
- * misc recs. It may span several recs.
- */
- if (abstrsz > 0) {
- targ = cur_ditto_mem->abstractp;
- targend = targ + abstrsz - 1;
- fzkey_remaining = fzkeysz;
- SETOR (PROGNAME "2270", OR_OBJ_MISCS, saveusr.vistano);
- FINDFM (PROGNAME "2271", OR_OBJ_MISCS, saveusr.vistano);
- while (db_status == S_OKAY) {
- RECREAD (PROGNAME "549", &rec_data, saveusr.vistano);
- NTOHS (rec_data.or_misctype);
- if (rec_data.or_misctype == ORM_FZKABS) {
- src = (char *) rec_data.or_misc;
- for (m = 0; m < sizeof(rec_data.or_misc); m++) {
- if (fzkey_remaining > 0) {
- src++;
- fzkey_remaining--;
- continue; /* inner for-loop on m */
- }
- *targ = *src;
- if (*src++ == 0 || targ++ >= targend) {
- *targ = 0;
- targ = targend; /* make outer loop end */
- break;
- }
- } /* end for-loop for curr misc rec */
- } /* endif: misctype == FZKABS */
- if (targ >= targend)
- break;
- FINDNM (PROGNAME "545", OR_OBJ_MISCS, saveusr.vistano);
- } /* end while-loop */
- } /* endif: (abstrsz > 0) */
- cur_ditto_mem->link = NULL;
- temp_ditto = cur_ditto_mem;
- /* Increment to next hit.
- * Break loop when we reach user's specified maxhits.
- */
- j32++; /* [j32 same as i] !? */
- if (j32 >= maxhits)
- break;
- } /* i32-loop on each hit in ditto list */
- if (j32 == 0) {
- usrblk.workproc = dummy_workproc;
- usrblk.retncode = OE_NOTAVAIL;
- if (OE_flags & OE_AUDIT)
- oe_write_audit_rec (0L);
- release_shm_mem ();
- return;
- }
- if (num_hits >= maxhits) {
- if (!(usrblk.flags & USR_NO_INFOMSGS)) {
- sprintf (vestat_msgbuf, CATGETS(dtsearch_catd, MS_vestatis, 421,
- "$s Total Number Hits = %ld. Discarded hits beyond maximum number specified."),
- PROGNAME "421", (long)num_hits);
- DtSearchAddMessage (vestat_msgbuf);
- }
- }
- free_llist ((LLIST **) &usrblk.dittolist);
- usrblk.dittolist = ditto_llist;
- usrblk.dittocount = j32;
- usrblk.workproc = dummy_workproc;
- usrblk.retncode = OE_OK;
- if (OE_flags & OE_AUDIT)
- oe_write_audit_rec ((long) num_hits);
- /***** Free shared memory *****/
- release_shm_mem ();
- return;
- } /* load_ditto_str() */
- /****************************************/
- /* */
- /* stat_search */
- /* */
- /****************************************/
- /* Subroutine of ve_statistical() and interruptable workproc.
- */
- static void stat_search (void)
- {
- time_t start_time;
- double time_dif;
- DB_ADDR temp, temp1;
- struct or_hwordrec word1; /* structure taken from austext.h */
- double idf, cur_weight;
- int qs;
- DtSrINT32 int32, j32;
- /*****@@@ size_t size;****/
- static int qs_start;
- /* Test whether user has pushed STOP button since last call */
- if (usrblk.flags & USR_STOPSRCH) {
- if (OE_flags & OE_AUDIT)
- oe_write_audit_rec (-1L);
- usrblk.retncode = OE_USER_STOP;
- release_shm_mem ();
- return;
- }
- if (begin_sort) {
- begin_qsort = TRUE;
- qsort_done = FALSE;
- if (begin_search) {
- qs_start = 0;
- begin_search = FALSE;
- }
- time (&start_time);
- /*
- * For every query stem, read d99. For every dba in d99 for each
- * stem, update object's stat array node with rec count and a
- * weight based on the IDF for this stem. (IDF is described
- * below). Saveusr.stemcount = lesser of DtSrMAX_STEMCOUNT or
- * num_diff_words. All stems are stored in d99 beginning with ^O
- * (decimal 15). Index qs = curr query stem
- */
- for (qs = qs_start; qs < saveusr.stemcount; qs++) {
- word1.or_hwordkey[0] = 15;
- word1.or_hwordkey[1] = '\0';
- strcat (word1.or_hwordkey, query_stems[qs].stem);
- find_keyword (word1.or_hwordkey, saveusr.vistano);
- /*
- * If word is not in the database, ignore it. [ If word
- * not in database, why not take the next stem in query_stems
- * array, if any? ]
- */
- if (db_status != S_OKAY)
- word1.or_hwaddrs = 0;
- else
- read_wordstr (&word1, saveusr.vistano);
- if (word1.or_hwaddrs > 0) {
- fseek (usrblk.dblk->iifile, word1.or_hwoffset,
- SEEK_SET);
- /****@@@size = sizeof (DB_ADDR) * word1.or_hwaddrs;***/
- fread (word_addrs, sizeof(DB_ADDR),
- (size_t)word1.or_hwaddrs, usrblk.dblk->iifile);
- /*
- * Calculate IDF (inverse document frequency) for this
- * word. The IDF is a statistical ratio of the number
- * of documents containing the word and the total
- * number of documents in the entire corpus.
- * It is calculated here on the fly to save space in the
- * database. IDF = {log (totnumdocs / numdocswithword) /
- * log(2)} + 1. Note that an IDF of 1 means the word
- * occurs in every doc (it's meaningless). An IDF of 19
- * means the word occurs once in every 300,000 recs.
- * Note that by dividing by log(2) the IDF also tells
- * us how many binary digits are necessary to discriminate
- * the word. Finally I think 1.0 was added to prevent
- * it ever becoming zero when converted to integer.
- */
- idf = (log ((double) real_num_rec / (double) word1.or_hwaddrs)
- / LOG2) + 1.0;
- /*
- * WEIGHT PASS #1:
- * Update the stat array node for each doc (ie dba) which
- * includes this stem. Specifically,
- * sum the product of the IDF and word-doc weight into
- * the 'wght' bucket, and update the number of query
- * words this doc contains. Note that the d99 dba format
- * is slot# in hi 3 bytes, word-doc weights in lo byte.
- */
- for (j32 = 0; j32 < word1.or_hwaddrs; j32++) {
- NTOHL (word_addrs [j32]);
- temp1 = *(word_addrs + j32); /* d99 dba */
- cur_weight = (double) (temp1 & 0xFF); /* lo byte */
- temp = temp1 >> 8; /* slot# */
- ((stat_array + temp)->num_word_hits)++;
- ((stat_array + temp)->dba) = temp;
- ((stat_array + temp)->wght) += (float) (cur_weight * idf);
- }
- } /* end if (word1.or_hwaddrs > 0), ie
- * query word exists */
- /*
- * If the query words were common, the last double loop may
- * have taken a long time. If so, return now to the user
- * interface to allow the gui to respond to button clicks
- * (like CANCEL buttons).
- */
- time_dif = difftime (time (NULL), start_time);
- if ((time_dif > TIME_ITERATION
- || usrblk.debug & USRDBG_ITERATE) &&
- !(usrblk.flags & USR_NO_ITERATE)) {
- if (qs == saveusr.stemcount - 1) {
- usrblk.retncode = OE_SEARCHING;
- usrblk.workproc = stat_search;
- mes_search_box = TRUE;
- return;
- }
- else {
- qs_start = qs + 1;
- usrblk.retncode = OE_SEARCHING;
- usrblk.workproc = stat_search;
- mes_search_box = TRUE;
- return;
- }
- } /* end if (time_dif > TIME_ITERATION */
- } /* end qs-loop on each query stem */
- /*
- * Entire stat array contains one node for every possible dba
- * (doc). Collapse the records that were actually referenced by
- * the query words into the top portion of the array.
- * Set 'num_hits' to the collapsed stat array size, ie
- * num_hits = the total number of docs that will be on
- * the prelim hitlist, prior to sort and truncation to user's maxhits.
- *
- * WEIGHT PASS #2:
- * While we're at it, finalize the accumulated 'wght' field, which
- * will be our sort field, by multiplying it by the ratio of the
- * number of query words in the document divided by the number of
- * words in the query.
- * Thus the final sort field for each doc is the sum
- * over all the query words in the doc of 3 factors:
- * 1) IDF (relative weight of each query word in corpus), times
- * 2) d99wght (relative weight of each query word in doc), times
- * 3) weight based on number of different query words in this doc.
- */
- num_hits = 0;
- for (int32 = 0; int32 < total_num_addrs; int32++) {
- if (stat_array[int32].wght > 0) {
- (stat_array + num_hits)->num_word_hits =
- (stat_array + int32)->num_word_hits;
- (stat_array + num_hits)->wght = (stat_array + int32)->wght *
- ((double) (stat_array + int32)->num_word_hits /
- (double) num_diff_words);
- (stat_array + num_hits)->dba = (stat_array + int32)->dba;
- num_hits++;
- }
- }
- /*
- * We're about to sort the actual hits. If the number of them
- * exceeds a certain threshold, return to the user interface one
- * more time to again allow the gui to respond to user CANCEL
- * events.
- */
- if (num_hits > SORT_MESG && !(usrblk.flags & USR_NO_ITERATE)) {
- if (!mes_search_box) {
- DtSearchAddMessage (CATGETS(dtsearch_catd, MS_vestatis, 990,
- PROGNAME"990 The system is now sorting. Please wait."));
- }
- usrblk.retncode = OE_SEARCHING;
- usrblk.workproc = stat_search;
- mes_search_box = TRUE;
- begin_sort = FALSE;
- return;
- }
- } /* end if (begin_sort) */
- /* Sort the preliminary hitlist (stat_array)
- * by the calculated statistical weights.
- */
- if (!efim_qsort ())
- return;
- /* Build a real AusText hitlist from the sorted stat_array,
- * translating the statistical weights to AusText 'proximity'
- * values, and truncating the hitlist at user's maxhits.
- */
- if (qsort_done) {
- begin_load_ditto = TRUE;
- load_ditto_str ();
- }
- return;
- } /* stat_search() */
- /****************************************/
- /* */
- /* ve_statistical */
- /* */
- /****************************************/
- void ve_statistical (void)
- {
- void stat_search (void);
- DB_ADDR dba;
- int i, j;
- DtSrINT32 int32;
- mes_search_box = FALSE;
- usrblk.flags &= ~USR_STOPSRCH; /* turn off stop button */
- usrblk.retncode = OE_OK;
- usrblk = usrblk;
- saveusr.vistano = usrblk.dblk->vistano;
- saveusr.dittolist = NULL;
- saveusr.dittocount = 0L;
- saveusr.iterations = 1;
- /****** find total number of records in the database *********/
- RECFRST (PROGNAME "1067", OR_OBJREC, saveusr.vistano);
- CRGET (PROGNAME "1068", &dba, saveusr.vistano);
- real_num_rec = usrblk.dblk->dbrec.or_reccount;
- slot_d00 = usrblk.dblk->dbrec.or_recslots;
- dba_offset = slot_d00 - (dba & 0x00FFFFFF);
- total_num_addrs = (usrblk.dblk->dbrec.or_maxdba -
- (dba & 0x00FFFFFF) + 1) / slot_d00 + 1;
- /* stat_array size = 1 node for every possible object */
- if (usrblk.query[0] == 0) {
- DtSearchAddMessage (CATGETS(dtsearch_catd, MS_vestatis,
- 677, PROGNAME "677 Query field is empty."));
- usrblk.retncode = OE_BAD_QUERY;
- return;
- }
- /*
- * Build binary tree of each stem in query containing count of number
- * of occurrences of stem in query. Loads num_diff_words with number
- * of nodes in tree.
- */
- num_diff_words = 0;
- inv_index_bin_tree();
- if (usrblk.retncode == OE_ABORT)
- return;
- if (num_diff_words < 1) {
- usrblk.retncode = OE_NOTAVAIL;
- return;
- }
- /***** allocate memory for query_stems array *********/
- if (query_stems != NULL) {
- free (query_stems);
- query_stems = NULL;
- }
- query_stems = (QUERY_STEM_STR *) austext_malloc
- (sizeof (QUERY_STEM_STR) * (num_diff_words + 1),
- PROGNAME " 371", NULL);
- /*
- * Traverse tree to build query_stems array, each array node = tree
- * node, ie each unique stem in query and its count in query.
- * Num_diff_words now used as index for growing array.
- */
- num_diff_words = 0;
- traverse_tree ();
- /*
- * For each new query initialize memory offset, current memory start
- * position, and total size for the available memory. Starts from the
- * first member in the link list of memory blocks.
- */
- root_node = NULL;
- mem_start = memory_blocks->start_of_mem_block;
- total_memory_size = memory_blocks->block_size;
- cur_mem_ptr = memory_blocks->next_block;
- cur_pos = mem_start;
- mem_offset = 0L;
- /*
- * Copy first DtSrMAX_STEMCOUNT stems into the saveusr.stems. [So no more
- * than DtSrMAX_STEMCOUNT will be used in search or hiliting!]
- */
- for (i = 0; i < num_diff_words; i++) {
- if (i == DtSrMAX_STEMCOUNT)
- break;
- strcpy (usrblk.stems[i], query_stems[i].stem);
- }
- usrblk.stemcount = i;
- saveusr.stemcount = i;
- /* Prepare a string holding first char of desired record ids */
- for (i = 0; i < REC_TYPES; i++)
- *(rec_type_tab + i) = 0;
- for (i = 0, j = 0; i < usrblk.dblk->ktcount; i++)
- if (usrblk.dblk->keytypes[i].is_selected)
- *(rec_type_tab + usrblk.dblk->keytypes[i].ktchar) = 1;
- saveusr.ktchars[j] = '\0';
- /*
- * New code using shared memory:
- * Allocate global block of shared memory,
- * and assign parts of this memory to each array.
- * Stat array has an element for every possible db object.
- * Set whole stat array to binary zeroes.
- */
- if (!init_global_memory (total_num_addrs, real_num_rec))
- return;
- stat_array = (STAT_STR *) global_memory_ptr;
- word_addrs = (DB_ADDR *) (global_memory_ptr +
- total_num_addrs * sizeof (STAT_STR));
- for (int32 = 0; int32 < total_num_addrs; int32++) {
- (stat_array + int32)->wght = 0.0;
- (stat_array + int32)->num_word_hits = 0;
- }
- /***** end of memory allocation for statistical array *********/
- /* stat_search(): Search d99 and sum the statistical weights.
- * Calls efim_qsort() to sort the hitlist by the weights.
- */
- begin_search = TRUE; /* global initialization and state flags */
- begin_sort = TRUE;
- stat_search ();
- return;
- } /* ve_statistical() */
- /*************************** VESTATIS.C ****************************/
|