123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243 |
- /*
- * CDE - Common Desktop Environment
- *
- * Copyright (c) 1993-2012, The Open Group. All rights reserved.
- *
- * These libraries and programs are free software; you can
- * redistribute them and/or modify them under the terms of the GNU
- * Lesser General Public License as published by the Free Software
- * Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * These libraries and programs are distributed in the hope that
- * they will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU Lesser General Public License for more
- * details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with these libraries and programs; if not, write
- * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
- * Floor, Boston, MA 02110-1301 USA
- */
- /*
- * COMPONENT_NAME: austext
- *
- * FUNCTIONS: discard_to_ETX
- * readchar_ftext
- * readchar_string
- *
- * ORIGINS: 27
- *
- *
- * (C) COPYRIGHT International Business Machines Corp. 1996
- * All Rights Reserved
- * Licensed Materials - Property of IBM
- * US Government Users Restricted Rights - Use, duplication or
- * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
- */
- /************************ READCHAR.C *******************************
- * $XConsortium: readchar.c /main/3 1996/05/07 13:47:58 drk $
- * January 1996.
- * Character reading cofunctions for language parsers.
- *
- * $Log$
- * Revision 1.5 1996/03/25 17:01:19 miker
- * Clean up compiler warning.
- *
- * Revision 1.4 1996/03/13 22:59:39 miker
- * Added prolog. Changed char to UCHAR several places.
- *
- * Revision 1.3 1996/03/05 18:39:34 miker
- * Make *all* char ptrs unsigned.
- *
- * Revision 1.2 1996/03/05 18:08:03 miker
- * Readchar functions return unsigned chars for compatibility
- * with compilers whose default char type is signed.
- *
- * Revision 1.1 1996/02/01 19:20:39 miker
- * Initial revision
- */
- #include "SearchP.h"
- #include <stdlib.h>
- #include <string.h>
- #define PROGNAME "READCHAR"
- /************************************************/
- /* */
- /* readchar_string */
- /* */
- /************************************************/
- /* Generic readchar cofunction for parsers when the
- * text block is a string. Used for example when
- * parsing queries.
- */
- UCHAR readchar_string (UCHAR *the_string)
- {
- static UCHAR *strp = (UCHAR *) "";
- if (the_string)
- strp = the_string;
- return ((*strp)? *strp++ : 0);
- }
- /************************************************/
- /* */
- /* discard_to_ETX */
- /* */
- /************************************************/
- /* Called when dtsrload or dtsrindex wants to skip to next
- * .fzk record by reading and discarding all text to either
- * end of record marker or end of file.
- * Usually called after some error condition in the .fzk file,
- * such as recid not found in database, or when blobs not
- * used in dtsrload so they can be discarded.
- * Wraps around readchar_ftext(), which does the actual read
- * and checks for ETX with a read-ahead buffer.
- */
- void discard_to_ETX (PARG *parg)
- {
- if (!readchar_ftext (parg))
- return;
- while (readchar_ftext (NULL))
- ;
- return;
- } /* discard_to_ETX() */
- /************************************************/
- /* */
- /* readchar_ftext */
- /* */
- /************************************************/
- /* Returns next char in a text file. Called in 2 different situations:
- * It's a character reader cofunction called from
- * linguistic parser functions for supported languages.
- * It's also called directly from discard_to_ETX() in offline
- * build tools for *all* languages when for some reason the
- * current record being indexed must be discarded all the way
- * to end of text block (ETX).
- * ETX is when etxdelim string detected, or at end-of-file.
- *
- * The first call, which passes parg, is a reset trigger
- * to clear ETX. The globals are then set and used in
- * subsequent calls (passing NULL). This technique is
- * used because it will be called many times in a time
- * critical loop while indexing.
- *
- * Returns '\0' at ETX, and keeps returning '\0'
- * without further reads until the ETX flag is reset.
- * Returns '\0' forever at end-of-file.
- */
- UCHAR readchar_ftext (PARG *parg)
- {
- static FILE *ftext = NULL;
- static UCHAR *etxdelim = NULL;
- static UCHAR *rabuf = NULL;
- static int ETX_flag = TRUE;
- static int delimsz = 0;
- static int bufcount = 0;
- static int i;
- static UCHAR *head, *tail, *cptr, *rabufend;
- /* I'm always going to read ahead just enough chars
- * to test the delim string. The string is expected
- * to be small, typically just a few chars.
- * (A single \0 char indicates there is no record
- * delimiter--record ends only at end of file.)
- * I use a circular read ahead buffer with head and tail ptrs.
- * Bufcount contains current number of chars in buf.
- * Head is next file read point, ahead of youngest char in buf.
- * Tail is next char to return, ie oldest char in buf.
- */
- if (parg) {
- ftext = parg->ftext;
- if (feof(ftext)) {
- ETX_flag = TRUE;
- return 0;
- }
- if (etxdelim) {
- free (etxdelim);
- etxdelim = NULL;
- }
- if (parg->etxdelim)
- etxdelim = (UCHAR *) strdup (parg->etxdelim);
- ETX_flag = FALSE;
- delimsz = (etxdelim)? strlen((char*) etxdelim) : 0;
- if (!rabuf) {
- rabuf = austext_malloc (MAX_ETXDELIM + 2, PROGNAME"479", NULL);
- rabufend = rabuf + MAX_ETXDELIM;
- }
- if (delimsz >= MAX_ETXDELIM) {
- fprintf (aa_stderr, PROGNAME"505 Record delimiter too long.\n");
- DtSearchExit (2);
- }
- head = tail = rabuf;
- bufcount = 0;
- }
- if (ETX_flag)
- return 0;
- /* Read chars into read ahead buf until we
- * have enough to compare for etxdelim.
- * If possible, always read in at least one char.
- */
- while (bufcount == 0 || bufcount < delimsz) {
- if (feof(ftext))
- break;
- if ((i = fgetc (ftext)) == EOF)
- break;
- *head++ = i;
- bufcount++;
- if (head >= rabufend)
- head = rabuf;
- }
- /* There are now 3 possibilities.
- * (1) If bufcount == 0 we got EOF and there
- * are no chars remaining in buffer, quit now.
- * (2) Most likely bufcount is nonzero and equals delimsz.
- * Do a wrap-around strcmp looking for delim string.
- * (3) If bufcount is positive but less than delimsz,
- * we got EOF before the last record ended.
- * We'll fall through to the code that returns the next
- * char in the buffer, returning the remaining chars one
- * at a time until exhausted.
- * Note this sequence also handles the case where delimsz == 0.
- */
- if (bufcount <= 0) {
- ETX_flag = TRUE;
- return 0;
- }
- /* Compare chars in read ahead buf for delim string.
- * (Note that if the compare succeeds, both bufcount
- * and delimsz must be > 0).
- */
- if (bufcount == delimsz) {
- cptr = tail;
- for (i = 0; i < delimsz; i++) {
- if (etxdelim[i] != *cptr)
- break;
- cptr++;
- if (cptr >= rabufend)
- cptr = rabuf;
- }
- if (i == delimsz) {
- ETX_flag = TRUE;
- return 0;
- }
- }
- /* No ETX yet. Return the oldest char in read ahead buffer. */
- cptr = tail++;
- if (tail >= rabufend)
- tail = rabuf;
- bufcount--;
- return *cptr;
- } /* readchar_ftext */
- /*************************** READCHAR.C ****************************/
|