123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642 |
- /*
- * CDE - Common Desktop Environment
- *
- * Copyright (c) 1993-2012, The Open Group. All rights reserved.
- *
- * These libraries and programs are free software; you can
- * redistribute them and/or modify them under the terms of the GNU
- * Lesser General Public License as published by the Free Software
- * Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * These libraries and programs are distributed in the hope that
- * they will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU Lesser General Public License for more
- * details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with these libraries and programs; if not, write
- * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
- * Floor, Boston, MA 02110-1301 USA
- */
- /*
- * COMPONENT_NAME: austext
- *
- * FUNCTIONS: findstr_workproc
- * kwic_workproc
- * make_one_kwic
- * oe_ditto2kwic
- * oe_findstr_hitl
- * restore_findstr_hitl
- *
- * ORIGINS: 27
- *
- * IBM CONFIDENTIAL -- (IBM Confidential Restricted when
- * combined with the aggregated modules for this product)
- * OBJECT CODE ONLY SOURCE MATERIALS
- *
- * (C) COPYRIGHT International Business Machines Corp. 1992,1995
- * All Rights Reserved
- * US Government Users Restricted Rights - Use, duplication or
- * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
- */
- /******************************* OEKWIC.C ********************************
- * $XConsortium: oekwic.c /main/4 1996/05/07 13:42:36 drk $
- * April 1992.
- * Opera Engine (OE) functions that create KeyWord In Context (KWIC)
- * abstracts to replace database abstract when requested by user.
- * KWIC abstract is a string extracted
- * from the cleartext where the first hitword appears.
- * Also includes find-string functions which use similar logic.
- * Also includes a few generic OE utilities.
- *
- * $Log$
- * Revision 1.6 1995/09/05 19:02:02 miker
- * Made usrblk universal global. Deleted refs to socblk.
- * Other minor name and function changes for DtSearch.
- *
- */
- #include "SearchE.h"
- #include <ctype.h>
- #include <time.h>
- /******#define DEBUG_KWIC*********/
- /********#define DEBUG_FINDSTR_ITER***********/
- /******#define DEBUG_FINDSTR********/
- #define PROGNAME "OEKWIC"
- #define START_KWIC_ITERATIONS 10
- #define START_FINDSTR_ITERATIONS 10
- #define MIN_KWIC_ITERATIONS 2
- #define MIN_FINDSTR_ITERATIONS 2
- #define MIN_KWIC_BUFSZ 20
- static int found_one_substring = FALSE;
- /************************************************/
- /* */
- /* make_one_kwic */
- /* */
- /************************************************/
- /* Builds abstract for record in usrblk.cleartext to
- * KWIC string where first word/substring in
- * usrblk.hitwords array is in center of abstract string.
- * Returns new abstract in passed buffer (in a ditto list).
- * General format of new abstract is: "...text <<word>> text...".
- */
- static void make_one_kwic (char *abstract)
- {
- char c;
- int i;
- long from; /* offset from beginning of cleartext */
- int abstrsz;
- char *to, *lastto;
- abstrsz = usrblk.dblk->dbrec.or_abstrsz;
- if (usrblk.kwiclen > 0 && usrblk.kwiclen < abstrsz)
- abstrsz = usrblk.kwiclen;
- to = abstract;
- lastto = to + abstrsz - 2;
- /* No hitwords to abstract */
- if (usrblk.hitwcount <= 0L)
- return;
- /* Find beginning of 'from' string */
- from = usrblk.hitwords[0].offset -
- ((abstrsz - usrblk.hitwords[0].length - 14) >> 1L);
- if (from < 0L) from = 0L;
- /* If abstract doesnt begin at start of cleartext, print ellipsis */
- if (from != 0L) for (i=3; i>0; i--)
- *to++ = '.';
- /* Move text up to the start of the word.
- * Replace cntrl chars with single space.
- */
- while (from < usrblk.hitwords[0].offset)
- {
- c = usrblk.cleartext [from++];
- *to++ = (iscntrl(c))? ' ' : c;
- }
- /* Move the word itself, hilited with angle brackets */
- for (i=2; i>0; i--)
- *to++ = '<';
- for (i=0; i<usrblk.hitwords[0].length; i++)
- *to++ = usrblk.cleartext [from++];
- for (i=2; i>0; i--)
- *to++ = '>';
- /* Move text beyond the word until end of input or end of abstract buffer */
- while (from < usrblk.clearlen && to < lastto - 4)
- {
- c = usrblk.cleartext [from++];
- *to++ = (iscntrl(c))? ' ' : c;
- }
- /* If not end of input, print another ellipsis */
- if (from < usrblk.clearlen) for (i=3; i>0; i--) *to++ = '.';
- *to = '\0';
- #ifdef DEBUG_KWIC
- printf("%s\n", abstract);
- #endif
- return;
- } /* make_one_kwic() */
- /************************************************/
- /* */
- /* oe_ditto2kwic */
- /* */
- /************************************************/
- /* Converts all abstracts in dittolist to KWIC strings */
- void oe_ditto2kwic (void)
- {
- void kwic_workproc (void);
- int search_type = usrblk.search_type;
- /* First validate the input fields in usrblk */
- if (usrblk.dittocount <= 0L || usrblk.stemcount <= 0)
- {
- usrblk.retncode = OE_BAD_QUERY;
- return;
- }
- /* Set up various global variables for calls to oe_stems_to_hitwords().
- * Only an exact words search (search_type == 'W') does not require
- * stemming. Statistical, semantic and exact stems searches
- * do require stemming.
- */
- OE_kind_of_stems = (search_type == 'W') ? WORD_KIND : STEM_KIND;
- /* Initialize where user loop status maintained between workproc calls */
- saveusr.dittolist = usrblk.dittolist;
- saveusr.iterations = START_KWIC_ITERATIONS;
- /* Call the work procedure that traverses the entire dittolist.
- * If NO_ITERATE is specified, it will run to completion.
- * Otherwise it will run just its first set of iterations,
- * move its own address into usrblk.workproc, then return.
- * Until workproc/mainloop is done, the static variable 'saveusr.dittolist'
- * will always indicate where the last execution of the loop ended.
- */
- usrblk.flags &= ~USR_STOPSRCH; /* init stop button to OFF */
- kwic_workproc(); /* work proc will set retncode */
- return;
- } /* oe_ditto2kwic() */
- /************************************************/
- /* */
- /* kwic_workproc */
- /* */
- /************************************************/
- /* called repeatedly to convert abstracts in dittolist to KWIC strings */
- void kwic_workproc (void)
- {
- int iter;
- int entire_list_done;
- time_t start_time;
- double time_dif;
- DtSrResult *dit;
- LLIST *bloblist;
- char sprintbuf[256];
- /* test whether user has pushed STOP button since last call */
- if (usrblk.flags & USR_STOPSRCH)
- {
- usrblk.retncode = OE_USER_STOP;
- return;
- }
- #ifdef DEBUG_KWIC
- printf ("\nSTART ITERATIONS = %d\n", saveusr.iterations);
- #endif
- /* initialize the loop */
- dit = saveusr.dittolist;
- entire_list_done = FALSE;
- time (&start_time);
- /* Traverse entire dittolist. Unblob each record,
- * create a hitword list, then use it to convert abstract.
- */
- for (iter = saveusr.iterations; iter > 0; )
- {
- /* Set usrblk.dblk ptr to correct database for curr ditto node */
- if (dbn_to_dblk (dit->dbn))
- saveusr.vistano = usrblk.dblk->vistano;
- else
- {
- usrblk.retncode = OE_NOTAVAIL;
- return;
- }
- /* If this record's database has too small an
- * abstract buffer, just skip the conversion.
- */
- if (usrblk.dblk->dbrec.or_abstrsz < MIN_KWIC_BUFSZ)
- goto NEXT_DIT;
- /* Skip any records which do not have blobs. */
- if (usrblk.dblk->dbrec.or_dbaccess != ORA_BLOB)
- goto NEXT_DIT;
- if ((bloblist = ve_getblobs (dit->dba, saveusr.vistano)) == NULL)
- {
- sprintf (sprintbuf, PROGNAME"149 "
- "Corrupted database address on hitlist. "
- "Erroneous record: '%s' in database '%s'.",
- dit->reckey, usrblk.dblk->label);
- DtSearchAddMessage (sprintbuf);
- usrblk.retncode = OE_NOTAVAIL;
- return;
- }
- usrblk.retncode = oe_unblob (bloblist, FALSE);
- if (usrblk.retncode != OE_OK)
- {
- sprintf (sprintbuf, PROGNAME"213 "
- "Erroneous record: '%s' in database '%s'.",
- dit->reckey, usrblk.dblk->label);
- DtSearchAddMessage (sprintbuf);
- return;
- }
- if (!oe_stems_to_hitwords (1))
- return;
- make_one_kwic (dit->abstractp);
- /* Increment ditto pointer. Check for end of dittolist. */
- NEXT_DIT:
- if ((dit = dit->link) == NULL)
- {
- entire_list_done = TRUE;
- break;
- }
- /* Decrement iteration counter unless user said not to */
- if (!(usrblk.flags & USR_NO_ITERATE))
- iter--;
- } /* end iteration loop */
- if (usrblk.debug & (USRDBG_SRCHCMPL | USRDBG_HITLIST))
- print_dittolist (saveusr.dittolist, PROGNAME"888");
- /* End of current set of iterations.
- * If main loop is not completed,
- * adjust number of iterations to about 1 second,
- * save current status, and return to caller.
- */
- if (!entire_list_done)
- {
- time_dif = difftime (time(NULL), start_time);
- if (time_dif < 1.)
- saveusr.iterations = (float) saveusr.iterations * 1.5;
- else if (time_dif > 1.)
- saveusr.iterations = (double) saveusr.iterations / time_dif;
- if (saveusr.iterations < MIN_KWIC_ITERATIONS)
- saveusr.iterations = MIN_KWIC_ITERATIONS;
- #ifdef DEBUG_KWIC
- printf ("\nEND ITERATIONS = %d, time_dif = %lf\n",
- saveusr.iterations, time_dif);
- #endif
- saveusr.dittolist = dit; /* temp save curr loc in usrblk.dittolist */
- usrblk.workproc = kwic_workproc;
- usrblk.retncode = OE_SEARCHING;
- return;
- } /* endif where we are still searching */
- /* Unless user specified no_iterate, calling program should always
- * reset workproc to NULL. Dummy_workproc just appends an
- * error message to notify programmer of his problem.
- */
- if (!(usrblk.flags & USR_NO_ITERATE))
- usrblk.workproc = dummy_workproc;
- usrblk.retncode = OE_OK;
- return;
- } /* kwic_workproc() */
- /************************************************/
- /* */
- /* oe_findstr_hitl */
- /* */
- /************************************************/
- /* Reduces dittolist to the subset of records that
- * contain string in query, and converts their
- * abstracts to a KWIC-like string.
- */
- void oe_findstr_hitl (void)
- {
- void findstr_workproc (void);
- char *ptr, *ptr2;
- char msgbuf [256];
- /* Validate input fields */
- if (usrblk.query == NULL || usrblk.query[0] == '\0')
- {
- usrblk.retncode = OE_BAD_QUERY;
- return;
- }
- if (strlen(usrblk.query) >= DtSrMAXWIDTH_HWORD)
- {
- sprintf (msgbuf, PROGNAME"516 "
- "No more than %d characters are allowed in the search string.",
- DtSrMAXWIDTH_HWORD - 1);
- DtSearchAddMessage (msgbuf);
- usrblk.retncode = OE_BAD_QUERY;
- return;
- }
- if (usrblk.dittocount <= 0 || usrblk.dittolist == NULL)
- {
- DtSearchAddMessage (PROGNAME"317 Hitlist empty. Nothing to search.");
- usrblk.retncode = OE_BAD_HITLIST;
- return;
- }
- /* Save stems array so we can restore it later,
- * then copy the query to it for use by oe_stems_to_hitwords call.
- * Convert to uppercase as we copy.
- */
- saveusr.stemcount = usrblk.stemcount;
- memcpy (saveusr.stems, usrblk.stems,
- (size_t) (usrblk.stemcount * DtSrMAXWIDTH_HWORD));
- usrblk.stemcount = 1;
- ptr = usrblk.stems[0]; /* target */
- ptr2 = usrblk.query; /* source */
- while (*ptr2 != 0)
- *ptr++ = toupper(*ptr2++);
- *ptr = 0;
- OE_kind_of_stems = STRING_KIND;
- /* initialize saveusr loop status stuff maintained between workproc calls */
- saveusr.vistano = usrblk.dblk->vistano;
- saveusr.iterations = START_FINDSTR_ITERATIONS;
- saveusr.dittolist = usrblk.dittolist; /* curr start of each iteration */
- saveusr.dittocount = 0; /* number of records containing substring */
- /* Call the work procedure that traverses the entire dittolist.
- * If NO_ITERATE is specified, it will run to completion.
- * Otherwise it will run just its first set of iterations,
- * move its own address into usrblk.workproc, then return.
- * Until workproc/mainloop is done, the static variable 'saveusr.dittolist'
- * will always indicate where the last execution of the loop ended.
- * The ditto node for each record which is found to contain
- * the string will be removed from dittolist and added to newditlist.
- */
- usrblk.flags &= ~USR_STOPSRCH; /* init stop button to OFF */
- found_one_substring = FALSE; /* init HIT indicator */
- findstr_workproc(); /* work proc will set retncode */
- return;
- } /* oe_findstr_hitl() */
- /************************************************/
- /* */
- /* restore_findstr_hitl */
- /* */
- /************************************************/
- /* Each time the find-string workproc detected the search
- * string in a ditto node, it converted its abstract.
- * Other nodes were just marked for deletion.
- * When the entire dittolist has been traversed,
- * or if the user pushes the stop button to stop traversal,
- * this cleanup function is called to delete those nodes that
- * were marked for deletion. However if NO records were
- * ever found containing the string, nothing is deleted and
- * the hitlist is restored as it was prior to the beginning of the search.
- * The hitwords array is always blown away but the stems
- * array is always restored no matter what.
- */
- static void restore_findstr_hitl (void)
- {
- DtSrResult *dit, *nextdit;
- DtSrResult **lastlink;
- long newdittocount = 0L;
- /* If the string was ever found in any record,
- * delete all nonhits up to the last successful find.
- */
- #ifdef DEBUG_FINDSTR
- printf ("\nRESTORE FINDSTR: totnumhits = %ld\n", saveusr.dittocount);
- #endif
- if (saveusr.dittocount > 0)
- {
- dit = usrblk.dittolist;
- lastlink = &usrblk.dittolist;
- while (dit != NULL)
- {
- /* On a hit, advance the pointers, advance new dittocount.
- * Break the loop on the last hit.
- */
- if (dit->flags & DIT_FINDSTR)
- {
- #ifdef DEBUG_FINDSTR
- printf ("#%ld HIT %s, \"%s\"\n", newdittocount+1,
- dit->reckey, dit->abstract);
- #endif
- lastlink = &dit->link;
- dit = dit->link;
- if (++newdittocount >= saveusr.dittocount)
- break;
- }
- /* If this was NOT a hit, delete node,
- * and link up loose ends.
- */
- else
- {
- #ifdef DEBUG_FINDSTR
- printf ("RESTORE DELETING %s\n", dit->reckey);
- #endif
- nextdit = dit->link;
- free (dit);
- *lastlink = nextdit;
- dit = nextdit;
- }
- }
- /* At this point, we've cleaned up the list
- * down to the last hit, or to its end.
- * Keep deleting until end of list, or user's stop point.
- */
- while (dit != NULL)
- {
- if (!(usrblk.flags & USR_STOPSRCH) /* never stopped */
- || !(dit->flags & DIT_STOP)) /* stopped somewhere ahead */
- {
- #ifdef DEBUG_FINDSTR
- printf ("PAST LAST HIT, DELETING %s\n", dit->reckey);
- #endif
- nextdit = dit->link;
- free (dit);
- *lastlink = nextdit;
- dit = nextdit;
- }
- else
- {
- break; /* user stopped at this exact node */
- }
- }
- /* If there's anything left on the list,
- * its because user stopped the search here.
- * Just count the remaining records for the final tally.
- */
- while (dit != NULL)
- {
- newdittocount++;
- #ifdef DEBUG_FINDSTR
- printf ("#%ld SAVING AFTER STOP %s\n", newdittocount, dit->reckey);
- #endif
- dit = dit->link;
- }
- usrblk.dittocount = newdittocount;
- } /* endif where at least one record had a string hit */
- /* restore the original stems array */
- usrblk.stemcount = saveusr.stemcount;
- memcpy (usrblk.stems, saveusr.stems,
- (size_t) (saveusr.stemcount * DtSrMAXWIDTH_HWORD));
- #ifdef DEBUG_FINDSTR
- printf ("LEAVING RESTORE now real dittocount = %ld, stemcount = %d\n"
- " first stem = '%s'\n", usrblk.dittocount,
- usrblk.stemcount, usrblk.stems[0]);
- #endif
- return;
- } /* restore_findstr_hitl() */
- /************************************************/
- /* */
- /* findstr_workproc */
- /* */
- /************************************************/
- /* Called repeatedly to search for character substrings in records
- * on the hitlist, and convert their abstracts to KWIC strings.
- */
- void findstr_workproc (void)
- {
- int iter;
- int entire_list_done;
- time_t start_time;
- double time_dif;
- DtSrResult *dit, *cutdit;
- LLIST *bloblist;
- /* Test whether user has pushed STOP button since last call.
- * The DIT_STOP marks where the search ended for restore_findstr_hitl().
- */
- if (usrblk.flags & USR_STOPSRCH)
- {
- saveusr.dittolist->flags |= DIT_STOP;
- usrblk.retncode = (found_one_substring)? OE_OK : OE_USER_STOP;
- restore_findstr_hitl();
- return;
- }
- #ifdef DEBUG_FINDSTR_ITER
- printf ("\nSTART ITERATIONS = %d\n", saveusr.iterations);
- #endif
- /* initialize the loop */
- dit = saveusr.dittolist;
- entire_list_done = FALSE;
- time (&start_time);
- /* Traverse entire dittolist, starting where we last left off.
- * Unblob each record and search for the query string.
- * If found, mark it and convert its abstract.
- * If not found, make sure it's not marked and then continue.
- */
- for (iter = saveusr.iterations; iter > 0; )
- {
- /*****usrblk.dba = dit->dba;**** unnecessary?******/
- if ((bloblist = ve_getblobs (dit->dba, saveusr.vistano)) == NULL)
- {
- DtSearchAddMessage (
- PROGNAME"390 Corrupted database address on hitlist.");
- restore_findstr_hitl();
- usrblk.retncode = OE_BAD_HITLIST;
- return;
- }
- usrblk.retncode = oe_unblob (bloblist, FALSE);
- if (usrblk.retncode != OE_OK)
- return;
- if (!oe_stems_to_hitwords (1))
- return;
- if (usrblk.hitwcount > 0) /* string FOUND */
- {
- found_one_substring = TRUE;
- saveusr.dittocount++;
- #ifdef DEBUG_FINDSTR_ITER
- printf ("<<--->> HIT numhits=%ld, reckey = %s\n",
- saveusr.dittocount, dit->reckey);
- #endif
- make_one_kwic (dit->abstractp);
- dit->flags |= DIT_FINDSTR;
- }
- else dit->flags = 0; /* string NOT found */
- /* advance to next ditto node */
- if ((dit = dit->link) == NULL)
- {
- entire_list_done = TRUE;
- break;
- }
- /* decrement iteration counter unless user said not to */
- if (!(usrblk.flags & USR_NO_ITERATE)) iter--;
- } /* end iteration loop */
- /* End of current set of iterations.
- * If main loop is not completed,
- * adjust number of iterations to about 1 second,
- * save current status, and return to caller.
- */
- if (!entire_list_done)
- {
- time_dif = difftime (time(NULL), start_time);
- if (time_dif < 1.)
- saveusr.iterations = (float) saveusr.iterations * 1.5;
- else if (time_dif > 1.)
- saveusr.iterations = (double) saveusr.iterations / time_dif;
- if (saveusr.iterations < MIN_FINDSTR_ITERATIONS)
- saveusr.iterations = MIN_FINDSTR_ITERATIONS;
- #ifdef DEBUG_FINDSTR_ITER
- printf ("\nEND ITERATIONS = %d, time_dif = %lf\n",
- saveusr.iterations, time_dif);
- #endif
- saveusr.dittolist = dit; /* temp save curr loc in usrblk.dittolist */
- usrblk.workproc = findstr_workproc;
- usrblk.retncode = OE_SEARCHING;
- return;
- } /* endif where we are still searching */
- /* Completely done! Unless user specified no_iterate,
- * calling program should always reset workproc to NULL.
- * In case he forgets, dummy_workproc just appends an
- * error message to notify programmer of his problem.
- */
- if (!(usrblk.flags & USR_NO_ITERATE))
- usrblk.workproc = dummy_workproc;
- if (saveusr.dittocount > 0)
- usrblk.retncode = OE_OK;
- else
- usrblk.retncode = OE_NOTAVAIL;
- restore_findstr_hitl();
- return;
- } /* findstr_workproc() */
-
- /******************************* OEKWIC.C ********************************/
|