dtsrload.c 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289
  1. /*
  2. * CDE - Common Desktop Environment
  3. *
  4. * Copyright (c) 1993-2012, The Open Group. All rights reserved.
  5. *
  6. * These libraries and programs are free software; you can
  7. * redistribute them and/or modify them under the terms of the GNU
  8. * Lesser General Public License as published by the Free Software
  9. * Foundation; either version 2 of the License, or (at your option)
  10. * any later version.
  11. *
  12. * These libraries and programs are distributed in the hope that
  13. * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14. * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15. * PURPOSE. See the GNU Lesser General Public License for more
  16. * details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with these libraries and programs; if not, write
  20. * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21. * Floor, Boston, MA 02110-1301 USA
  22. */
  23. /*
  24. * COMPONENT_NAME: austext
  25. *
  26. * FUNCTIONS: UPDATE_MAXDBA
  27. * count_all_records
  28. * create_object
  29. * load_next_miscrec
  30. * main
  31. * print_exit_code
  32. * print_progress
  33. * read_dbrec
  34. * segregate_dicname
  35. * update_object
  36. * user_args_processor
  37. * write_dbrec
  38. *
  39. * ORIGINS: 27
  40. *
  41. *
  42. * (C) COPYRIGHT International Business Machines Corp. 1993,1995
  43. * All Rights Reserved
  44. * Licensed Materials - Property of IBM
  45. * US Government Users Restricted Rights - Use, duplication or
  46. * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
  47. */
  48. /*********************** DTSRLOAD.C ***************************
  49. * $XConsortium: dtsrload.c /main/8 1996/09/23 21:04:17 cde-ibm $
  50. * October 1993.
  51. * Formerly dtsrload.c was cravel.c.
  52. * Input: Standard AusText .fzk file.
  53. * Function: Adds to or updates corresponding DtSearch-
  54. * AusText database records.
  55. *
  56. * $Log$
  57. * Revision 2.7 1996/03/25 18:54:44 miker
  58. * Changed FILENAME_MAX to _POSIX_PATH_MAX.
  59. *
  60. * Revision 2.6 1996/03/13 22:53:47 miker
  61. * Changed char to UCHAR several places.
  62. *
  63. * Revision 2.5 1996/02/01 18:46:02 miker
  64. * AusText 2.1.11, DtSearch 0.3. Changed document text reads from fgets
  65. * to new single character reading functions to match dtsrindex.
  66. * Added -t etx delimiter string command line arg.
  67. *
  68. * Revision 2.4 1995/12/01 16:18:22 miker
  69. * Added fflush for stdout and stderr for clean printing to AusBuild log.
  70. *
  71. * Revision 2.3 1995/10/26 17:48:45 miker
  72. * Fixed duplicate msgs catopen().
  73. *
  74. * Revision 2.2 1995/10/25 18:39:52 miker
  75. * Added prolog.
  76. *
  77. * Revision 2.1 1995/09/22 19:31:48 miker
  78. * Freeze DtSearch 0.1, AusText 2.1.8
  79. *
  80. * Revision 1.3 1995/09/20 22:52:47 miker
  81. * Fixed bug: DtSrFlNOTAVAIL was being set in wrong obj field.
  82. *
  83. * Revision 1.2 1995/09/19 21:59:53 miker
  84. * Set DtSrFlNOTAVAIL when appropriate for doc.
  85. * If DtSearch, use DtSrVERSION instead of AUSAPI_VERSION in banner.
  86. *
  87. * Revision 1.1 1995/08/31 20:52:34 miker
  88. * Initial revision
  89. *
  90. * Revision 1.12 1995/06/08 19:42:44 miker
  91. * 2.1.5f: Removed -w option. It no longer had an effect.
  92. */
  93. #include "SearchP.h"
  94. #include <limits.h>
  95. #include <errno.h>
  96. #include <string.h>
  97. #include <signal.h>
  98. #include <ctype.h>
  99. #include <sys/stat.h>
  100. #include <locale.h>
  101. #include <unistd.h>
  102. #include <stdlib.h>
  103. #include "vista.h"
  104. #include <sys/types.h>
  105. #include <netinet/in.h>
  106. #define PROGNAME "DTSRLOAD"
  107. #define RECS_PER_DOT 20
  108. #define TERMINATE_LINE if (dotcount>0) { putchar('\n'); }
  109. #define EXIT_NORMAL 0 /* perfect return code */
  110. #define EXIT_WARNING 1 /* functioned ok, but with warnings */
  111. #define EXIT_VANISH 3 /* input file effectively empty */
  112. #define MS_misc 1
  113. #define MS_cravel 11
  114. /*--------------- EXTERNS ------------------*/
  115. extern volatile int
  116. shutdown_now;
  117. extern void gen_vec (char *fname_huffcode_tab);
  118. extern long gen_vec_hufid;
  119. /*--------------- GLOBALS ------------------*/
  120. static char *abstrbuf = NULL;
  121. static int blobs_are_used; /* boolean */
  122. static long created_reccount = 0L;
  123. static long dbrec_hufid = 1L;
  124. unsigned long default_hashsize;
  125. int debug_mode = FALSE;
  126. int debug_encode = FALSE;
  127. static char dicname[10]; /* 1 - 8 char database name */
  128. char dicpath[_POSIX_PATH_MAX];
  129. static int dotcount = 0;
  130. static long duplicate_recids = 0L;
  131. char fname_huffcode_tab[_POSIX_PATH_MAX];
  132. char fname_input[_POSIX_PATH_MAX];
  133. struct stat fstat_input;
  134. static FILE *infile = NULL;
  135. static long input_reccount = 0L;
  136. static DtSrINT32
  137. maxdba = 0;
  138. static int need_final_progress_msg = TRUE;
  139. static int normal_exitcode = EXIT_NORMAL;
  140. static DtSrINT32
  141. objsize = 0;
  142. static DtSrObjdate
  143. objdate = 0;
  144. static DB_ADDR objdba = NULL_DBA;
  145. static PARG parg;
  146. static int recs_per_dot = RECS_PER_DOT;
  147. static time_t starttime = 0L;
  148. static DtSrObjdate
  149. starttimeobjd = 0;
  150. char sprintbuf[1024 + _POSIX_PATH_MAX];
  151. static int sumblobs = 0;
  152. static int sumlines = 0;
  153. static DtSrINT32
  154. system_reccount = 0;
  155. static long updated_reccount = 0L;
  156. struct or_dbrec dbrec;
  157. struct or_objrec objrec;
  158. struct or_miscrec miscrec;
  159. struct or_blobrec blobrec;
  160. /********************************************************/
  161. /* */
  162. /* UPDATE_MAXDBA */
  163. /* */
  164. /********************************************************/
  165. /* Ensures global var 'maxdba' always contains highest D00 slot number */
  166. #define UPDATE_MAXDBA(dba) {if((dba&0xffffff)>maxdba)maxdba=dba&0xffffff;}
  167. /********************************************************/
  168. /* */
  169. /* segregate_dicname */
  170. /* */
  171. /********************************************************/
  172. /* Separates dictionary name from pathname and loads
  173. * them into the globals 'dicname' and 'dicpath'.
  174. * Returns TRUE if dicname is valid, else returns FALSE.
  175. */
  176. static int segregate_dicname (char *string)
  177. {
  178. char *ptr;
  179. int i;
  180. strncpy (dicpath, string, sizeof (dicpath));
  181. dicpath[sizeof (dicpath) - 1] = 0;
  182. /* Set 'ptr' to just the 8 char dictionary name by moving
  183. * it backwards until first non-alphanumeric character
  184. * (such as a ":" in the dos drive id or a slash between directories),
  185. * or to the beginning of string.
  186. */
  187. for (ptr = dicpath + strlen (dicpath) - 1; ptr >= dicpath; ptr--)
  188. if (!isalnum (*ptr)) {
  189. ptr++;
  190. break;
  191. }
  192. if (ptr < dicpath)
  193. ptr = dicpath;
  194. /* test for valid dictionary name */
  195. i = strlen (ptr);
  196. if (i < 1 || i > 8)
  197. return FALSE;
  198. strcpy (dicname, ptr);
  199. *ptr = 0; /* truncate dicname off of full path/dicname */
  200. return TRUE;
  201. } /* segregate_dicname() */
  202. /********************************************************/
  203. /* */
  204. /* user_args_processor */
  205. /* */
  206. /********************************************************/
  207. /* handles command line arguments for 'main' */
  208. static void user_args_processor (int argc, char **argv)
  209. {
  210. char *argptr;
  211. char *src, *targ;
  212. if (argc <= 1) {
  213. PRINT_USAGE:
  214. printf (catgets (dtsearch_catd, MS_cravel, 1,
  215. "\nUSAGE: %s -d<dbname> [options] infile\n"
  216. " Listed default file name extensions can be overridden.\n"
  217. " -d<dbname> 1 - 8 char database name, incl optional path prefix.\n"
  218. " File name extensions automatically appended.\n"
  219. " -t<etxstr> End of text doc delimiter string. Default '\\f\\n'.\n"
  220. " -c Initialize database record count by counting records.\n"
  221. " -p<N> Print a progress dot every <N> records (default %d).\n"
  222. " -h<N> Change duplicate rec id hash table size from %ld to <N>.\n"
  223. " -h0 means there are no duplicates, don't check for them.\n"
  224. " -e<path> Path-filename of huffman encode table (default %s).\n"
  225. " <infile> Input [path]file name. Default extension %s.\n"
  226. ),
  227. aa_argv0,
  228. RECS_PER_DOT, default_hashsize,
  229. FNAME_HUFFCODE_TAB, EXT_FZKEY);
  230. DtSearchExit (2);
  231. }
  232. /* Each pass grabs new parm of "-xxx" format */
  233. for (argc--, argv++; argc > 0 && ((*argv)[0] == '-' || (*argv)[0] == '+');
  234. argc--, argv++) {
  235. argptr = argv[0];
  236. if (strncmp (argptr, "-russell", 8) == 0) {
  237. debug_mode = TRUE;
  238. if (argptr[8] == '2')
  239. debug_encode = TRUE;
  240. continue;
  241. }
  242. argptr[1] = tolower (argptr[1]);
  243. switch (argptr[1]) {
  244. case 'd': /* (D)ictionary */
  245. /* May include both dicname and dicpath */
  246. if (!segregate_dicname (argptr + 2)) {
  247. printf (catgets (dtsearch_catd, MS_cravel, 246,
  248. "\n%s '%s' is invalid path/dictionary name.\n"),
  249. PROGNAME, argptr);
  250. goto PRINT_USAGE;
  251. }
  252. break;
  253. case 't': /* ETX delimiter string */
  254. /* Replace any "\n" string with real linefeed */
  255. targ = parg.etxdelim = malloc (strlen (argptr + 2) + 4);
  256. src = argptr + 2;
  257. while (*src) {
  258. if (src[0] == '\\' && src[1] == 'n') {
  259. *targ++ = '\n';
  260. src += 2;
  261. }
  262. else
  263. *targ++ = *src++;
  264. }
  265. *targ = 0;
  266. break;
  267. case 'p':
  268. if ((recs_per_dot = atoi (argptr + 2)) <= 0) {
  269. recs_per_dot = RECS_PER_DOT;
  270. printf (catgets (dtsearch_catd, MS_cravel, 582,
  271. "%sIgnored invalid progress dot argument '%s'.\n"),
  272. PROGNAME "582 ", argptr);
  273. }
  274. break;
  275. case 'e':
  276. append_ext (fname_huffcode_tab, sizeof (fname_huffcode_tab),
  277. argptr + 2, EXT_HUFFCODE);
  278. break;
  279. case 'h':
  280. duprec_hashsize = atol (argptr + 2);
  281. if (duprec_hashsize == 0UL)
  282. printf (catgets (dtsearch_catd, MS_cravel, 13,
  283. "%s Duplicate record id checking disabled.\n"),
  284. PROGNAME);
  285. break;
  286. case 'c': /* force correct initial reccount by counting
  287. * records */
  288. system_reccount = -1;
  289. break;
  290. default:
  291. UNKNOWN_ARG:
  292. printf (catgets (dtsearch_catd, MS_cravel, 14,
  293. "\n%s Unknown command line argument '%s'.\n"),
  294. PROGNAME, argptr);
  295. } /* endswitch */
  296. } /* endwhile for cmd line '-'processing */
  297. /* validate input file name */
  298. if (argc <= 0) {
  299. puts (catgets (dtsearch_catd, MS_cravel, 15,
  300. "\nMissing required input file name.\a"));
  301. goto PRINT_USAGE;
  302. }
  303. else
  304. append_ext (fname_input, sizeof (fname_input), argv[0], EXT_FZKEY);
  305. /* check for missing database name */
  306. if (dicname[0] == 0) {
  307. puts (catgets (dtsearch_catd, MS_cravel, 16,
  308. "\nNo database name specified (-d argument).\a"));
  309. goto PRINT_USAGE;
  310. }
  311. return;
  312. } /* user_args_processor() */
  313. /****************************************/
  314. /* */
  315. /* count_all_records */
  316. /* */
  317. /****************************************/
  318. /* Initializes system_reccount and maxdba by
  319. * actually counting all records in database.
  320. * Must be called after dbrec has been read to ensure
  321. * maxdba accounts for last miscrec slot number.
  322. */
  323. static void count_all_records (void)
  324. {
  325. char keybuf[DtSrMAX_DB_KEYSIZE + 4];
  326. printf (catgets (dtsearch_catd, MS_cravel, 17,
  327. "%s Initializing total record count "
  328. "in database by actually counting...\n"),
  329. PROGNAME);
  330. system_reccount = 0;
  331. maxdba = 0;
  332. KEYFRST (PROGNAME "286", OR_OBJKEY, 0);
  333. while (db_status == S_OKAY) {
  334. KEYREAD (PROGNAME "288", keybuf);
  335. if (db_status != S_OKAY)
  336. vista_abort (PROGNAME "288");
  337. /* don't count records beginning with ctrl char */
  338. if (keybuf[0] >= 32) {
  339. system_reccount++;
  340. CRGET (PROGNAME "251", &objdba, 0);
  341. UPDATE_MAXDBA (objdba);
  342. }
  343. KEYNEXT (PROGNAME "291", OR_OBJKEY, 0);
  344. }
  345. /* account for last record's misc record slots */
  346. maxdba += dbrec.or_recslots;
  347. return;
  348. } /* count_all_records() */
  349. /****************************************/
  350. /* */
  351. /* read_dbrec */
  352. /* */
  353. /****************************************/
  354. /* Read the database's dbrec and load global variables
  355. * system_reccount and maxdba with current values from db.
  356. */
  357. static void read_dbrec (void)
  358. {
  359. RECFRST (PROGNAME "285", OR_DBREC, 0); /* seqtl retrieval */
  360. if (db_status != S_OKAY) {
  361. printf (catgets (dtsearch_catd, MS_misc, 13,
  362. "%sNo DB record in database '%s'.\n"
  363. " The usual cause is failure to initialize "
  364. "the database (run initausd).\n"),
  365. PROGNAME"296 ", dicname);
  366. DtSearchExit (8);
  367. }
  368. RECREAD (PROGNAME "302", &dbrec, 0);
  369. if (db_status != S_OKAY)
  370. vista_abort (PROGNAME "303");
  371. swab_dbrec (&dbrec, NTOH);
  372. if (debug_mode) {
  373. printf (PROGNAME
  374. " DBREC: reccount=%ld maxdba=%ld vers='%s' dbacc=%d\n"
  375. " fzkeysz=%d abstrsz=%d maxwordsz=%d otype=%d lang=%d\n"
  376. " hufid=%ld flags=x%x compflags=x%x uflags=x%lx sec=x%lx\n"
  377. ,(long)dbrec.or_reccount
  378. ,(long)dbrec.or_maxdba
  379. ,dbrec.or_version
  380. ,(int)dbrec.or_dbaccess
  381. ,(int)dbrec.or_fzkeysz
  382. ,(int)dbrec.or_abstrsz
  383. ,(int)dbrec.or_maxwordsz
  384. ,(int)dbrec.or_dbotype
  385. ,(int)dbrec.or_language
  386. ,(long)dbrec.or_hufid
  387. ,(int)dbrec.or_dbflags
  388. ,(int)dbrec.or_compflags
  389. ,(long)dbrec.or_dbuflags
  390. ,(long)dbrec.or_dbsecmask
  391. );
  392. }
  393. dbrec_hufid = dbrec.or_hufid;
  394. /* Confirm compatible program-database version numbers */
  395. if (!is_compatible_version (dbrec.or_version, SCHEMA_VERSION)) {
  396. printf (catgets(dtsearch_catd, MS_misc, 14,
  397. "%s Program schema version '%s' incompatible with "
  398. "database '%s' version '%s'.\n") ,
  399. PROGNAME"245", SCHEMA_VERSION, dicname, dbrec.or_version);
  400. DtSearchExit(4);
  401. }
  402. /* If blobs are specified for the database,
  403. * they must be compressed blobs.
  404. */
  405. switch (dbrec.or_dbaccess) {
  406. case ORA_VARIES: /* use of blobs determined obj by obj */
  407. case ORA_BLOB: /* objects stored directly in blobs */
  408. case ORA_REFBLOB: /* refs to objects stored in blobs */
  409. blobs_are_used = TRUE;
  410. if (!(dbrec.or_compflags & ORC_COMPBLOB)) {
  411. /* = don't compress blobs */
  412. printf (catgets (dtsearch_catd, MS_cravel, 717,
  413. "%s Aborting: Uncompressed blobs not yet supported.\n"),
  414. PROGNAME"717");
  415. DtSearchExit (5);
  416. }
  417. break;
  418. default:
  419. blobs_are_used = FALSE;
  420. break;
  421. }
  422. /* Initialize global variable maxdba, which records largest slot number.
  423. * If requested, init tot reccount by actually counting records.
  424. */
  425. if (system_reccount == -1)
  426. count_all_records ();
  427. else {
  428. system_reccount = dbrec.or_reccount;
  429. maxdba = dbrec.or_maxdba;
  430. }
  431. printf (catgets (dtsearch_catd, MS_cravel, 18,
  432. "%s: '%s' schema ver = %s, rec count = %ld, last slot = %ld.\n"),
  433. aa_argv0, dicname, dbrec.or_version,
  434. (long)system_reccount, (long)maxdba);
  435. return;
  436. } /* read_dbrec() */
  437. /****************************************/
  438. /* */
  439. /* write_dbrec */
  440. /* */
  441. /****************************************/
  442. /* Write the database's updated reccount and maxdba fields */
  443. static void write_dbrec (void)
  444. {
  445. int i;
  446. DtSrINT32 int32;
  447. RECFRST (PROGNAME "355", OR_DBREC, 0); /* seqtl retrieval */
  448. if (db_status != S_OKAY)
  449. vista_abort (PROGNAME "356");
  450. int32 = htonl (system_reccount);
  451. CRWRITE (PROGNAME "341", OR_RECCOUNT, &int32, 0);
  452. int32 = htonl (maxdba);
  453. CRWRITE (PROGNAME "342", OR_MAXDBA, &int32, 0);
  454. /* If this was the first load of a new database,
  455. * save the huffman encode table id.
  456. */
  457. if (blobs_are_used && dbrec_hufid == -1) {
  458. int32 = htonl ((DtSrINT32)gen_vec_hufid);
  459. CRWRITE (PROGNAME "343", OR_HUFID, &int32, 0);
  460. }
  461. if (db_status != S_OKAY)
  462. vista_abort (PROGNAME "344");
  463. printf (catgets (dtsearch_catd, MS_cravel, 19,
  464. "%s: Final database record count = %ld, last slot = %ld.\n"),
  465. aa_argv0, (long)system_reccount, (long)maxdba);
  466. return;
  467. } /* write_dbrec() */
  468. /************************************************/
  469. /* */
  470. /* print_progress */
  471. /* */
  472. /************************************************/
  473. /* prints complete progress message and statistics to stdout */
  474. static void print_progress (void)
  475. {
  476. time_t seconds = time (NULL) - starttime;
  477. long bytes_in = ftell (infile);
  478. if (bytes_in <= 0L)
  479. bytes_in = fstat_input.st_size; /* make final msg "100%" */
  480. TERMINATE_LINE
  481. printf (catgets (dtsearch_catd, MS_cravel, 20,
  482. "%s: %ld input records processed in %ldm %lds, (%ld%%).\n"
  483. " %ld duplicates, %ld new, %ld updates.\n"),
  484. aa_argv0,
  485. input_reccount, seconds / 60L, seconds % 60L,
  486. (bytes_in * 100L) / fstat_input.st_size,
  487. duplicate_recids, created_reccount, updated_reccount);
  488. need_final_progress_msg = FALSE;
  489. return;
  490. } /* print_progress() */
  491. /************************************************/
  492. /* */
  493. /* print_exit_code */
  494. /* */
  495. /************************************************/
  496. /* Called from inside DtSearchExit() at austext_exit_last */
  497. static void print_exit_code (int exit_code)
  498. {
  499. if (dotcount) {
  500. putchar ('\n');
  501. dotcount = 0;
  502. }
  503. printf ( catgets(dtsearch_catd, MS_cravel, 2,
  504. "%s: Exit code = %d\n") ,
  505. aa_argv0, exit_code);
  506. fflush (aa_stderr);
  507. fflush (stdout);
  508. return;
  509. } /* print_exit_code() */
  510. /************************************************/
  511. /* */
  512. /* load_next_miscrec */
  513. /* */
  514. /************************************************/
  515. /* Repeatedly called from create_object() or update_object()
  516. * to fill miscrec buffer with next FZKABSTR type miscrec
  517. * from input file data saved in fzkbuf and abstrbuf.
  518. * First call for a given object is signaled by passed arg.
  519. * Thereafter static pointers keep track of where we are
  520. * in the source bufs to correctly load the next miscrec.
  521. * Initial state = fill-with-fzkey, if there is a fzkey.
  522. * Second state = fill-with-abstract, if there is an abstract.
  523. * Last state = zero-fill balance of remaining misc records.
  524. * Returns TRUE until last state completed (no more miscrecs can be written).
  525. */
  526. static int load_next_miscrec (int first_call)
  527. {
  528. static enum {
  529. FILL_FZKEY, FILL_ABSTR, FILL_ZEROS
  530. }
  531. fill_state = FILL_ZEROS;
  532. static char *src = NULL;
  533. static int srclen = 0;
  534. static int totbytes = 0;
  535. int i;
  536. char *targ;
  537. /* Initialize static variables at first call. */
  538. if (first_call) {
  539. /* If fzkey-abstract misc recs not used, return immediately. */
  540. if ((totbytes = dbrec.or_fzkeysz + dbrec.or_abstrsz) <= 0)
  541. return FALSE;
  542. if (dbrec.or_fzkeysz > 0) {
  543. fprintf (aa_stderr, catgets(dtsearch_catd, MS_cravel, 522,
  544. "%s This version of %s does not support semantic databases.\n"),
  545. PROGNAME"522", aa_argv0);
  546. DtSearchExit (13);
  547. }
  548. else {
  549. fill_state = FILL_ABSTR;
  550. src = abstrbuf;
  551. srclen = dbrec.or_abstrsz;
  552. }
  553. }
  554. /* If NOT first call, but we've finished writing everything out,
  555. * then tell the caller there's nothing left to do.
  556. */
  557. else if (totbytes <= 0)
  558. return FALSE;
  559. /* Main loop is on each byte of the or_misc field of miscrec.
  560. * Depending on the fill state, the byte will be a fzkey byte,
  561. * an abstract byte, or a binary zero byte.
  562. */
  563. targ = (char *) miscrec.or_misc;
  564. for (i = 0; i < sizeof(miscrec.or_misc); i++, totbytes--) {
  565. switch (fill_state) {
  566. case FILL_FZKEY:
  567. *targ++ = *src++;
  568. if (--srclen <= 0) { /* end of fzkey? */
  569. if (dbrec.or_abstrsz > 0) {
  570. fill_state = FILL_ABSTR;
  571. src = abstrbuf;
  572. srclen = dbrec.or_abstrsz;
  573. }
  574. else
  575. fill_state = FILL_ZEROS;
  576. }
  577. break;
  578. case FILL_ABSTR:
  579. if (*src == 0 || --srclen <= 0) /* end of abstract? */
  580. fill_state = FILL_ZEROS;
  581. *targ++ = *src++;
  582. break;
  583. case FILL_ZEROS:
  584. *targ++ = 0;
  585. break;
  586. default:
  587. fprintf (aa_stderr, catgets (dtsearch_catd, MS_misc, 25,
  588. "%sAbort due to program error.\n"),
  589. PROGNAME "549 ");
  590. DtSearchExit (54);
  591. } /* end switch */
  592. } /* end for-loop */
  593. miscrec.or_misctype = ORM_FZKABS;
  594. return TRUE;
  595. } /* load_next_miscrec() */
  596. /************************************************/
  597. /* */
  598. /* create_object */
  599. /* */
  600. /************************************************/
  601. /* Creates new object rec and misc recs from current vista rec.
  602. * Sets global objdba to new rec's dba and updates maxdba if necessary.
  603. * 1 create fields in objrec buffer, and write it.
  604. * (or_objsize will be rewritten after text size has been determined.)
  605. * 2 create fzkey-abstract rec as necessary.
  606. */
  607. static void create_object (char *key)
  608. {
  609. int i;
  610. char *src, *targ;
  611. DB_ADDR tempdba;
  612. memset (&objrec, 0, sizeof (objrec));
  613. /* Copy the key into the buffer. The previous initialization
  614. * ensures that the key will be padded on the right with zero fill.
  615. * At this point, key length should never be too long because
  616. * it has been previously tested (when the line was first read in).
  617. */
  618. src = key;
  619. targ = objrec.or_objkey;
  620. for (i = 0; i < DtSrMAX_DB_KEYSIZE; i++) {
  621. if (*src == 0)
  622. break;
  623. *targ++ = *src++;
  624. }
  625. /* Objdate will be updated later if line #4 has
  626. * valid DtSrObjdate format. Otherwise current
  627. * date/time stamp will be the default.
  628. */
  629. objrec.or_objdate = starttimeobjd;
  630. /* If all objects in database are same type, mark approp obj flag */
  631. if (dbrec.or_dbotype != 0)
  632. objrec.or_objtype = dbrec.or_dbotype;
  633. /* If blobs are never used, mark each obj as 'unretrievable' */
  634. if (!blobs_are_used)
  635. objrec.or_objflags |= DtSrFlNOTAVAIL;
  636. swab_objrec (&objrec, HTON);
  637. FILLNEW (PROGNAME "487", OR_OBJREC, &objrec, 0);
  638. if (db_status != S_OKAY)
  639. vista_abort (PROGNAME "495");
  640. CRGET (PROGNAME "375", &objdba, 0); /* save object's dba */
  641. UPDATE_MAXDBA (objdba);
  642. if (debug_mode)
  643. printf ("---> new rec: inrecno %6ld, slot %6ld, key '%s'\n",
  644. (long int) input_reccount, (long int) objdba & 0xffffff, objrec.or_objkey);
  645. /* Make current object record the owner of all its sets */
  646. SETOR (PROGNAME "376", OR_OBJ_BLOBS, 0);
  647. SETOR (PROGNAME "377", OR_OBJ_MISCS, 0);
  648. /* If fzkeys and/or abstracts are used,
  649. * write out the misc record(s) now.
  650. */
  651. if (load_next_miscrec (TRUE))
  652. do {
  653. HTONS (miscrec.or_misctype);
  654. FILLNEW (PROGNAME "501", OR_MISCREC, &miscrec, 0);
  655. CRGET (PROGNAME "503", &tempdba, 0);
  656. UPDATE_MAXDBA (tempdba);
  657. CONNECT (PROGNAME "505", OR_OBJ_MISCS, 0);
  658. } while (load_next_miscrec (FALSE));
  659. system_reccount++; /* new obj rec, so incr tot num database recs */
  660. created_reccount++;
  661. return;
  662. } /* create_object() */
  663. /************************************************/
  664. /* */
  665. /* update_object */
  666. /* */
  667. /************************************************/
  668. /* Reinitializes portions of preexisting object rec.
  669. * (Presumes vista 'current record' is desired object rec.)
  670. * Sets objdba to rec's dba and updates maxdba if necessary.
  671. * System_reccount is not altered because this is not a new record.
  672. * 1 reinit certain fields in objrec, and rewrite it.
  673. * (or_objsize will be rewritten after text size has been determined.)
  674. * 2 delete all blobs (there should be no hyper recs,
  675. * and existing user notes should not be changed).
  676. * 3 update fzkey-abstract rec(s) as necessary.
  677. * Important: misc rec updates should always be IN-PLACE.
  678. * If miscrecs are deleted first then readded,
  679. * there is no guarantee that their slots will be adjacent.
  680. * This will screw up bit vector calculations in the inverted
  681. * index word searches. In-place updates are faster anyway,
  682. * and we know that the number of misc rec slots is constant.
  683. */
  684. static void update_object (char *key)
  685. {
  686. int i;
  687. int first_fzkabstr = TRUE;
  688. DtSrINT16 misctype;
  689. DtSrINT32 int32;
  690. DB_ADDR tempdba;
  691. DtSrINT32 zero_objsize = 0;
  692. /* Slot number is dba with high order byte (filenum) parsed out */
  693. CRGET (PROGNAME "467", &objdba, 0); /* save object's dba */
  694. UPDATE_MAXDBA (objdba);
  695. if (debug_mode)
  696. printf ("----> update: inrecno %6ld, slot %6ld, key '%s'\n",
  697. (long int) input_reccount, (long int) objdba & 0xffffff, key);
  698. /* Reinit certain fields.
  699. * Objsize will be rewritten after new text size determined.
  700. * Objdate will be rewritten if .fzk file has valid
  701. * DtSrObjdate format in line #4.
  702. */
  703. CRWRITE (PROGNAME "472", OR_OBJSIZE, &zero_objsize, 0);
  704. int32 = htonl (starttimeobjd);
  705. CRWRITE (PROGNAME "681", OR_OBJDATE, &int32, 0);
  706. /* Make current object record the owner of all its sets */
  707. SETOR (PROGNAME "475", OR_OBJ_BLOBS, 0);
  708. SETOR (PROGNAME "476", OR_OBJ_MISCS, 0);
  709. /* Delete all blobs in a loop */
  710. FINDFM (PROGNAME "480", OR_OBJ_BLOBS, 0);
  711. while (db_status == S_OKAY) {
  712. DISDEL (PROGNAME "482", 0);
  713. FINDFM (PROGNAME "483", OR_OBJ_BLOBS, 0);
  714. }
  715. /* Update all miscrecs in a loop.
  716. * User notes are left alone,
  717. * and fzkey-abstracts are updated.
  718. * Currently other types are not allowed.
  719. */
  720. first_fzkabstr = TRUE;
  721. FINDFM (PROGNAME "480", OR_OBJ_MISCS, 0);
  722. while (db_status == S_OKAY) {
  723. CRREAD (PROGNAME "496", OR_MISCTYPE, &misctype, 0);
  724. NTOHS (misctype);
  725. switch (misctype) {
  726. case ORM_OLDNOTES:
  727. case ORM_HUGEKEY:
  728. break; /* do nothing */
  729. case ORM_FZKABS: /* combined fzkey-abstract rec */
  730. if (load_next_miscrec (first_fzkabstr)) {
  731. HTONS (miscrec.or_misctype);
  732. RECWRITE (PROGNAME "601", &miscrec, 0);
  733. CRGET (PROGNAME "605", &tempdba, 0);
  734. UPDATE_MAXDBA (tempdba);
  735. first_fzkabstr = FALSE;
  736. }
  737. else {
  738. DISDEL (PROGNAME "709", 0);
  739. }
  740. break;
  741. default:
  742. DISDEL (PROGNAME "529", 0);
  743. } /* end switch */
  744. FINDNM (PROGNAME "506", OR_OBJ_MISCS, 0);
  745. } /* end update loop for all members of OBJ_MISCS set */
  746. updated_reccount++;
  747. return;
  748. } /* update_object() */
  749. /************************************************/
  750. /* */
  751. /* call_encoder */
  752. /* */
  753. /************************************************/
  754. /* Called from main while reading document text.
  755. * Calls huffman compression encoder at convenient
  756. * intervals and at ETX.
  757. */
  758. static void call_encoder (UCHAR *ucharbuf, int buflen)
  759. {
  760. objsize += buflen;
  761. if (debug_encode) {
  762. sumlines += buflen;
  763. printf ("buflen = %d, sumlines = %d, cum objsize = %ld\n",
  764. (int)buflen, (int)sumlines, (long)objsize);
  765. }
  766. if (hc_encode (&blobrec, ucharbuf, buflen, FALSE)) {
  767. if (debug_encode) {
  768. sumblobs += blobrec.or_bloblen;
  769. printf ("---> WRITE sumlines = %d, bloblen = %d, "
  770. "sumblobs = %d, objsize = %ld\n",
  771. sumlines, (int)blobrec.or_bloblen,
  772. (int)sumblobs, (long)objsize);
  773. sumlines = 0;
  774. }
  775. HTONS (blobrec.or_bloblen);
  776. FILLNEW (PROGNAME "572", OR_BLOBREC, &blobrec, 0);
  777. CONNECT (PROGNAME "578", OR_OBJ_BLOBS, 0);
  778. }
  779. return;
  780. } /* call_encoder() */
  781. /************************************************/
  782. /* */
  783. /* main */
  784. /* */
  785. /************************************************/
  786. int main (int argc, char *argv[])
  787. {
  788. static int hufftab_never_loaded = TRUE;
  789. DBLK dblk;
  790. int i, linelen;
  791. DtSrINT32 int32;
  792. char *cptr, *targ, *src;
  793. char *db_key;
  794. char uniqkey [DtSrMAX_DB_KEYSIZE + 4];
  795. char linebuf [2048];
  796. struct tm *tmptr;
  797. /* Init globals */
  798. setlocale (LC_ALL, "");
  799. dtsearch_catd = catopen (FNAME_DTSRCAT, 0);
  800. aa_argv0 = argv[0];
  801. time (&starttime);
  802. tmptr = localtime (&starttime);
  803. starttimeobjd = tm2objdate (tmptr);
  804. strftime (linebuf, sizeof (linebuf),
  805. catgets (dtsearch_catd, MS_misc, 22, "%A, %b %d %Y, %I:%M %p"),
  806. tmptr);
  807. printf (catgets (dtsearch_catd, MS_misc, 23,
  808. "%s: Version %s. Run %s.\n"),
  809. aa_argv0,
  810. DtSrVERSION,
  811. linebuf);
  812. austext_exit_last = print_exit_code;
  813. init_user_interrupt (); /* specify signal handlers */
  814. default_hashsize = duprec_hashsize; /* deflt val in isduprec.c */
  815. strcpy (fname_huffcode_tab, FNAME_HUFFCODE_TAB);
  816. dicname[0] = 0;
  817. dicpath[0] = 0;
  818. memset (&dblk, 0, sizeof(DBLK));
  819. memset (&parg, 0, sizeof(PARG));
  820. parg.dblk = &dblk;
  821. parg.etxdelim = ETXDELIM;
  822. /* Parse user's command line args and maybe change global variables */
  823. user_args_processor (argc, argv);
  824. strcpy (dblk.name, dicname);
  825. /* Open the database */
  826. if (debug_mode)
  827. printf (PROGNAME "211 database OPEN string = '%s%s'\n",
  828. dicpath, dicname);
  829. if (!austext_dopen (dicname, dicpath, NULL, 0, NULL)) {
  830. fprintf (aa_stderr, "%s\n", DtSearchGetMessages());
  831. DtSearchExit (3);
  832. }
  833. src = getcwd (linebuf, sizeof (linebuf));
  834. if (!src)
  835. src = getenv ("PWD");
  836. printf (catgets (dtsearch_catd, MS_misc, 24,
  837. "%s: cwd = '%s', fzkfile = '%s'\n"),
  838. aa_argv0,
  839. (src) ? src : catgets (dtsearch_catd, MS_misc, 6, "<unknown>"),
  840. fname_input);
  841. if ((infile = fopen (fname_input, "r")) == NULL) {
  842. fprintf (aa_stderr, catgets (dtsearch_catd, MS_misc, 12,
  843. "%sUnable to open %s:\n %s\n"),
  844. PROGNAME "326 ", fname_input, strerror (errno));
  845. DtSearchExit (6);
  846. }
  847. parg.ftext = infile; /* for discard_to_ETX() */
  848. /* Read in starting database record count and other db config/status data */
  849. read_dbrec ();
  850. /* If fzkeys and/or abstracts are used,
  851. * create correctly sized buffers for them.
  852. */
  853. if (dbrec.or_fzkeysz > 0) {
  854. fprintf (aa_stderr, catgets(dtsearch_catd, MS_cravel, 522,
  855. "%s This version of %s does not support semantic databases.\n"),
  856. PROGNAME"523", aa_argv0);
  857. DtSearchExit (13);
  858. }
  859. if (dbrec.or_abstrsz > 0)
  860. abstrbuf = austext_malloc (dbrec.or_abstrsz + 16, PROGNAME "744", NULL);
  861. /* Get input file size for progress msgs */
  862. if (fstat (fileno (infile), &fstat_input) == -1) {
  863. fprintf (aa_stderr, catgets (dtsearch_catd, MS_cravel, 29,
  864. "%s Unable to get status for %s: %s\n"),
  865. PROGNAME"337", fname_input, strerror (errno));
  866. DtSearchExit (10);
  867. }
  868. if (fstat_input.st_size <= 0L) {
  869. fprintf (aa_stderr, catgets (dtsearch_catd, MS_cravel, 30,
  870. "%s File %s is empty.\n"),
  871. PROGNAME"343", fname_input);
  872. DtSearchExit (7);
  873. }
  874. printf (catgets (dtsearch_catd, MS_cravel, 31,
  875. "%s: Each dot = %d records processed.\n"),
  876. aa_argv0, recs_per_dot);
  877. /*-------------------- MAIN LOOP --------------------
  878. * Executed once for each new input record.
  879. * 1. Read and process the FZKEY line.
  880. * 2. Read and process the ABSTRACT line.
  881. * 3. Read the UNIQUE KEY line.
  882. * Write out an object record at this point.
  883. * 4. Read and process the DATE line, update object rec.
  884. * 5. Use readchar_ftext to read document text until ETX.
  885. * Either blob it or discard it as appropriate.
  886. */
  887. while (!feof(infile)) {
  888. /*----- READ LINE #1, fzkey -------------------------
  889. * First line of new record.
  890. * Abort now if a shutdown signal was sent.
  891. * Skip null records (ETX str followed immediately by ETX str).
  892. * If this database uses fzkeys, "pack" current fzkey
  893. * and save it in the correct miscrec buffer.
  894. * If fzkeys are combined with abstracts they share the same
  895. * miscrec, otherwise they they reside in their own miscrec.
  896. * WARNING! Presumes or_fzkeysz <= the space allocated
  897. * for it in the correct miscrec.
  898. *-----------------------------------------------------*/
  899. if (fgets (linebuf, sizeof(linebuf) - 1, infile) == NULL)
  900. break;
  901. /* Got at least one line of a new record. Print progress dots */
  902. if (!(input_reccount % recs_per_dot)) {
  903. if (input_reccount) {
  904. putchar ('.');
  905. dotcount++;
  906. if (!(dotcount % 10))
  907. putchar (' ');
  908. if (dotcount >= 50) {
  909. print_progress ();
  910. dotcount = 0;
  911. }
  912. else
  913. fflush (stdout);
  914. }
  915. }
  916. input_reccount++;
  917. need_final_progress_msg = TRUE;
  918. linebuf [sizeof(linebuf)-1] = 0;
  919. linelen = strlen (linebuf);
  920. objsize = 0;
  921. if (shutdown_now) {
  922. TERMINATE_LINE
  923. printf (catgets (dtsearch_catd, MS_misc, 15,
  924. "%sReceived abort signal %d.\n"),
  925. PROGNAME"373 ", shutdown_now);
  926. write_dbrec (); /* at least update reccount and maxdba */
  927. DtSearchExit (100 + shutdown_now);
  928. }
  929. /* Skip null record */
  930. if (strcmp (linebuf, parg.etxdelim) == 0)
  931. continue;
  932. /*----- READ LINE #2, abstract ------------------------
  933. * Second line is abstract line. Save it in record buffer,
  934. * hopping over the first 10 chars ("ABSTRACT: ....").
  935. *-----------------------------------------------------*/
  936. if (fgets (linebuf, sizeof (linebuf) - 1, infile) == NULL)
  937. break;
  938. linebuf [sizeof(linebuf)-1] = 0;
  939. linelen = strlen (linebuf);
  940. if (strncmp (linebuf, "ABSTRACT: ", 10) != 0) {
  941. cptr = PROGNAME"580";
  942. INVALID_FORMAT:
  943. normal_exitcode = EXIT_WARNING;
  944. TERMINATE_LINE
  945. printf (catgets (dtsearch_catd, MS_cravel, 579,
  946. "%s Discarded rec #%ld: Invalid .fzk file format.\n"),
  947. cptr, input_reccount);
  948. if (strcmp (linebuf, parg.etxdelim) != 0)
  949. discard_to_ETX (&parg);
  950. continue;
  951. }
  952. /* If abstracts are used, save this one in the abstract buffer */
  953. if (dbrec.or_abstrsz > 0) {
  954. linebuf[--linelen] = 0; /* delete terminating \n */
  955. strncpy (abstrbuf, linebuf + 10, dbrec.or_abstrsz);
  956. abstrbuf[dbrec.or_abstrsz - 1] = 0;
  957. }
  958. /*--- READ LINE #3, unique database key ------------------
  959. * Third line is 'unique record id'.
  960. * If key is valid update old objrec
  961. * or create new one as necessary.
  962. * (There may be one more write required
  963. * after we determine total blob size).
  964. *-----------------------------------------------------*/
  965. if (fgets (linebuf, sizeof (linebuf) - 1, infile) == NULL)
  966. break;
  967. linebuf [sizeof(linebuf)-1] = 0;
  968. linelen = strlen (linebuf);
  969. if (strcmp (linebuf, parg.etxdelim) == 0) {
  970. cptr = PROGNAME"1068";
  971. goto INVALID_FORMAT;
  972. }
  973. /*
  974. * Isolate first token surrounded by whitespace
  975. * (and parse out \n)
  976. */
  977. if ((db_key = strtok (linebuf, " \t\n")) == NULL) {
  978. cptr = PROGNAME"1076";
  979. goto INVALID_FORMAT;
  980. }
  981. if (strlen (db_key) > DtSrMAX_DB_KEYSIZE - 1) {
  982. normal_exitcode = EXIT_WARNING;
  983. TERMINATE_LINE
  984. printf (catgets (dtsearch_catd, MS_cravel, 33,
  985. "%s Discarded rec #%ld: Key too long:\n '%s'.\n"),
  986. PROGNAME"606", input_reccount, db_key);
  987. discard_to_ETX (&parg);
  988. continue;
  989. }
  990. if (!isalnum (db_key[0])) {
  991. normal_exitcode = EXIT_WARNING;
  992. TERMINATE_LINE
  993. printf (catgets (dtsearch_catd, MS_cravel, 927,
  994. "%s Discarded rec #%ld: First char (keytype) of key\n"
  995. " '%s' is not alphanumeric.\n"),
  996. PROGNAME"927", input_reccount, db_key);
  997. discard_to_ETX (&parg);
  998. continue;
  999. }
  1000. /* If duplicate record in fzk file, discard it. */
  1001. i = is_duprec (db_key);
  1002. if (i == 2) {
  1003. TERMINATE_LINE
  1004. printf (catgets (dtsearch_catd, MS_cravel, 34,
  1005. "%s Out of Memory! "
  1006. "Set -h arg to a smaller number,\n"
  1007. " or reduce the number of input records.\n"),
  1008. PROGNAME"1096");
  1009. DtSearchExit (55);
  1010. }
  1011. else if (i == 1) { /* skip duplicate record id */
  1012. normal_exitcode = EXIT_WARNING;
  1013. TERMINATE_LINE
  1014. printf (catgets (dtsearch_catd, MS_cravel, 35,
  1015. "%s: Discarded duplicate rec #%ld '%s'.\n"),
  1016. aa_argv0, input_reccount, db_key);
  1017. duplicate_recids++;
  1018. discard_to_ETX (&parg);
  1019. continue;
  1020. }
  1021. /*
  1022. * Try to read the object record from the database. If it
  1023. * already exists (UPDATE): delete all its blobs (there
  1024. * should be no hyper recs). create or update
  1025. * fzkey-abstract recs as necessary. dont change any
  1026. * existing user notes. update fields in objrec buffer,
  1027. * but don't write it yet-- objrec will be rewritten
  1028. * after text size has been determined. If it doesn't
  1029. * exist (CREATE): create fields in objrec buffer, and
  1030. * write it. create fzkey-abstract recs as necessary.
  1031. * objrec will be rewritten after text size has been
  1032. * determined. After update or create, objdba contains
  1033. * dba of curr obj record.
  1034. */
  1035. strcpy (uniqkey, db_key);
  1036. KEYFIND (PROGNAME "489", OR_OBJKEY, uniqkey, 0);
  1037. if (db_status == S_OKAY)
  1038. update_object (uniqkey);
  1039. else
  1040. create_object (uniqkey);
  1041. /*----- READ LINE #4, date -----------------------------
  1042. * Line #4 is object date/time string (OBJDATESTR format).
  1043. * It is no longer optional. If invalid, the current
  1044. * run date that was preloaded into the record is used.
  1045. *-----------------------------------------------------*/
  1046. if (fgets (linebuf, sizeof (linebuf) - 1, infile) == NULL)
  1047. break;
  1048. linebuf [sizeof(linebuf)-1] = 0;
  1049. linelen = strlen (linebuf);
  1050. if (!is_objdatestr (linebuf, &objdate)) {
  1051. normal_exitcode = EXIT_WARNING;
  1052. if (strcmp (linebuf, parg.etxdelim) == 0) {
  1053. cptr = PROGNAME"1155";
  1054. goto INVALID_FORMAT;
  1055. }
  1056. TERMINATE_LINE
  1057. printf (catgets (dtsearch_catd, MS_cravel, 1086,
  1058. "%s Record '%s' has invalid date format--"
  1059. "using run date.\n"),
  1060. PROGNAME"1086", uniqkey);
  1061. }
  1062. else { /* objdate is valid */
  1063. KEYFIND (PROGNAME "1098", OR_OBJKEY, uniqkey, 0);
  1064. if (db_status != S_OKAY)
  1065. vista_abort (PROGNAME "1101");
  1066. HTONL (objdate); /* ready for record writes */
  1067. CRWRITE (PROGNAME "1102", OR_OBJDATE, &objdate, 0);
  1068. }
  1069. /*----- READ TO ETX, record text ---------------------
  1070. * Balance of record (after line 4 to end of record marker)
  1071. * is text. It may or may not be formatted in neat ascii
  1072. * lines, ie it may not have periodic linefeeds (\n).
  1073. * If this database does not store compressed records (blobs)
  1074. * we just discard all chars to end of text delimiter (ETX).
  1075. * Otherwise we read it char by char using readchar_ftext()
  1076. * and fill linebuf to some convenient size.
  1077. *
  1078. * Repeated calls to hc_encode() build
  1079. * a compressed record in its own internal blobrec buffer.
  1080. * When the buffer is full, hc_encode copies it to
  1081. * the passed blobrec buffer and returns TRUE.
  1082. * The caller should then write out the blobrec.
  1083. * If hc_encode returns FALSE, its internal blobrec is not
  1084. * yet full so the caller should not yet write out his record.
  1085. *-----------------------------------------------------*/
  1086. if (!blobs_are_used) {
  1087. discard_to_ETX (&parg);
  1088. continue;
  1089. }
  1090. /*
  1091. * Initialize blob compression by reading in huffman
  1092. * encode table (first execution only). Ensure table id
  1093. * is same as one used for previous compressions, if any.
  1094. */
  1095. if (hufftab_never_loaded) {
  1096. hufftab_never_loaded = FALSE;
  1097. gen_vec (fname_huffcode_tab);
  1098. if (dbrec_hufid != gen_vec_hufid && dbrec_hufid != -1L) {
  1099. TERMINATE_LINE
  1100. printf (catgets (dtsearch_catd, MS_cravel, 1153,
  1101. "%s Current data compression table id"
  1102. " in '%s' is %ld.\n"
  1103. " Database '%s' previously compressed"
  1104. " with table %ld.\n"),
  1105. PROGNAME"1153 ", fname_huffcode_tab,
  1106. gen_vec_hufid, dicname, dbrec_hufid);
  1107. DtSearchExit (53);
  1108. }
  1109. }
  1110. /*
  1111. * Compress document text. Repeatedly load linebuf
  1112. * with fixed number of chars and compress it.
  1113. */
  1114. if (debug_encode) {
  1115. sumlines = 0;
  1116. sumblobs = 0;
  1117. }
  1118. if ((linebuf[0] = readchar_ftext (&parg)) == 0) {
  1119. normal_exitcode = EXIT_WARNING;
  1120. TERMINATE_LINE
  1121. printf ( catgets(dtsearch_catd, MS_cravel, 1215,
  1122. "%s Warning. Record '%s' has no text.\n"),
  1123. PROGNAME"1215" , uniqkey);
  1124. continue;
  1125. }
  1126. linelen = 1;
  1127. while (linebuf [linelen] = readchar_ftext (NULL)) {
  1128. if (++linelen >= 80) {
  1129. call_encoder ((UCHAR *)linebuf, linelen);
  1130. linelen = 0;
  1131. }
  1132. }
  1133. /*
  1134. * At ETX: If a partial line remains, process it just like
  1135. * the full lines above. Then write out total size to
  1136. * object record, and make the final call to hc_encode with
  1137. * empty line and TRUE flag to indicate 'no more text,
  1138. * flush your last partial buffer'.
  1139. */
  1140. if (linelen)
  1141. call_encoder ((UCHAR *)linebuf, linelen);
  1142. CRSET (PROGNAME "685", &objdba, 0);
  1143. int32 = htonl (objsize);
  1144. CRWRITE (PROGNAME "686", OR_OBJSIZE, &int32, 0);
  1145. if (hc_encode (&blobrec, (UCHAR *)"", 0, TRUE)) {
  1146. if (debug_encode) {
  1147. sumblobs += blobrec.or_bloblen;
  1148. printf ("---> FINAL sumlines =%d, bloblen = %d, "
  1149. "sumblobs = %ld, objsize = %ld\n",
  1150. (int)sumlines, (int)blobrec.or_bloblen,
  1151. (long)sumblobs, (long)objsize);
  1152. }
  1153. HTONS (blobrec.or_bloblen);
  1154. FILLNEW (PROGNAME "624", OR_BLOBREC, &blobrec, 0);
  1155. CONNECT (PROGNAME "625", OR_OBJ_BLOBS, 0);
  1156. }
  1157. } /* end main record loop */
  1158. if (need_final_progress_msg)
  1159. print_progress ();
  1160. fclose (infile);
  1161. write_dbrec ();
  1162. /* If all input records were discarded, complete processing
  1163. * but upgrade warning exit code to hard error code.
  1164. */
  1165. if (created_reccount <= 0L && updated_reccount <= 0L) {
  1166. normal_exitcode = EXIT_VANISH;
  1167. fprintf (stderr, catgets (dtsearch_catd, MS_cravel, 1048,
  1168. "%sDatabase objects not changed because input "
  1169. "file effectively empty.\n"),
  1170. PROGNAME "1048 ");
  1171. }
  1172. /* Close database and print return code via exits.
  1173. * Return code is either 0 (perfect), 1 (warnings),
  1174. * or 3 (input file effectively empty).
  1175. */
  1176. DtSearchExit (normal_exitcode);
  1177. } /* main() */
  1178. /*********************** DTSRLOAD.C ***************************/