dtsrload.c 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291
  1. /*
  2. * CDE - Common Desktop Environment
  3. *
  4. * Copyright (c) 1993-2012, The Open Group. All rights reserved.
  5. *
  6. * These libraries and programs are free software; you can
  7. * redistribute them and/or modify them under the terms of the GNU
  8. * Lesser General Public License as published by the Free Software
  9. * Foundation; either version 2 of the License, or (at your option)
  10. * any later version.
  11. *
  12. * These libraries and programs are distributed in the hope that
  13. * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14. * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15. * PURPOSE. See the GNU Lesser General Public License for more
  16. * details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with these libraries and programs; if not, write
  20. * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21. * Floor, Boston, MA 02110-1301 USA
  22. */
  23. /*
  24. * COMPONENT_NAME: austext
  25. *
  26. * FUNCTIONS: UPDATE_MAXDBA
  27. * count_all_records
  28. * create_object
  29. * load_next_miscrec
  30. * main
  31. * print_exit_code
  32. * print_progress
  33. * read_dbrec
  34. * segregate_dicname
  35. * update_object
  36. * user_args_processor
  37. * write_dbrec
  38. *
  39. * ORIGINS: 27
  40. *
  41. *
  42. * (C) COPYRIGHT International Business Machines Corp. 1993,1995
  43. * All Rights Reserved
  44. * Licensed Materials - Property of IBM
  45. * US Government Users Restricted Rights - Use, duplication or
  46. * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
  47. */
  48. /*********************** DTSRLOAD.C ***************************
  49. * $XConsortium: dtsrload.c /main/8 1996/09/23 21:04:17 cde-ibm $
  50. * October 1993.
  51. * Formerly dtsrload.c was cravel.c.
  52. * Input: Standard AusText .fzk file.
  53. * Function: Adds to or updates corresponding DtSearch-
  54. * AusText database records.
  55. *
  56. * $Log$
  57. * Revision 2.7 1996/03/25 18:54:44 miker
  58. * Changed FILENAME_MAX to _POSIX_PATH_MAX.
  59. *
  60. * Revision 2.6 1996/03/13 22:53:47 miker
  61. * Changed char to UCHAR several places.
  62. *
  63. * Revision 2.5 1996/02/01 18:46:02 miker
  64. * AusText 2.1.11, DtSearch 0.3. Changed document text reads from fgets
  65. * to new single character reading functions to match dtsrindex.
  66. * Added -t etx delimiter string command line arg.
  67. *
  68. * Revision 2.4 1995/12/01 16:18:22 miker
  69. * Added fflush for stdout and stderr for clean printing to AusBuild log.
  70. *
  71. * Revision 2.3 1995/10/26 17:48:45 miker
  72. * Fixed duplicate msgs catopen().
  73. *
  74. * Revision 2.2 1995/10/25 18:39:52 miker
  75. * Added prolog.
  76. *
  77. * Revision 2.1 1995/09/22 19:31:48 miker
  78. * Freeze DtSearch 0.1, AusText 2.1.8
  79. *
  80. * Revision 1.3 1995/09/20 22:52:47 miker
  81. * Fixed bug: DtSrFlNOTAVAIL was being set in wrong obj field.
  82. *
  83. * Revision 1.2 1995/09/19 21:59:53 miker
  84. * Set DtSrFlNOTAVAIL when appropriate for doc.
  85. * If DtSearch, use DtSrVERSION instead of AUSAPI_VERSION in banner.
  86. *
  87. * Revision 1.1 1995/08/31 20:52:34 miker
  88. * Initial revision
  89. *
  90. * Revision 1.12 1995/06/08 19:42:44 miker
  91. * 2.1.5f: Removed -w option. It no longer had an effect.
  92. */
  93. #include "SearchP.h"
  94. #include <limits.h>
  95. #include <errno.h>
  96. #include <string.h>
  97. #include <signal.h>
  98. #include <ctype.h>
  99. #include <sys/stat.h>
  100. #include <locale.h>
  101. #include <unistd.h>
  102. #include <stdlib.h>
  103. #include "vista.h"
  104. #include <sys/types.h>
  105. #include <netinet/in.h>
  106. void init_user_interrupt(void); // lib/DtSearch/userint.c
  107. #define PROGNAME "DTSRLOAD"
  108. #define RECS_PER_DOT 20
  109. #define TERMINATE_LINE if (dotcount>0) { putchar('\n'); }
  110. #define EXIT_NORMAL 0 /* perfect return code */
  111. #define EXIT_WARNING 1 /* functioned ok, but with warnings */
  112. #define EXIT_VANISH 3 /* input file effectively empty */
  113. #define MS_misc 1
  114. #define MS_cravel 11
  115. /*--------------- EXTERNS ------------------*/
  116. extern volatile int
  117. shutdown_now;
  118. extern void gen_vec (char *fname_huffcode_tab);
  119. extern long gen_vec_hufid;
  120. /*--------------- GLOBALS ------------------*/
  121. static char *abstrbuf = NULL;
  122. static int blobs_are_used; /* boolean */
  123. static long created_reccount = 0L;
  124. static long dbrec_hufid = 1L;
  125. unsigned long default_hashsize;
  126. int debug_mode = FALSE;
  127. int debug_encode = FALSE;
  128. static char dicname[10]; /* 1 - 8 char database name */
  129. char dicpath[_POSIX_PATH_MAX];
  130. static int dotcount = 0;
  131. static long duplicate_recids = 0L;
  132. char fname_huffcode_tab[_POSIX_PATH_MAX];
  133. char fname_input[_POSIX_PATH_MAX];
  134. struct stat fstat_input;
  135. static FILE *infile = NULL;
  136. static long input_reccount = 0L;
  137. static DtSrINT32
  138. maxdba = 0;
  139. static int need_final_progress_msg = TRUE;
  140. static int normal_exitcode = EXIT_NORMAL;
  141. static DtSrINT32
  142. objsize = 0;
  143. static DtSrObjdate
  144. objdate = 0;
  145. static DB_ADDR objdba = NULL_DBA;
  146. static PARG parg;
  147. static int recs_per_dot = RECS_PER_DOT;
  148. static time_t starttime = 0L;
  149. static DtSrObjdate
  150. starttimeobjd = 0;
  151. char sprintbuf[1024 + _POSIX_PATH_MAX];
  152. static int sumblobs = 0;
  153. static int sumlines = 0;
  154. static DtSrINT32
  155. system_reccount = 0;
  156. static long updated_reccount = 0L;
  157. struct or_dbrec dbrec;
  158. struct or_objrec objrec;
  159. struct or_miscrec miscrec;
  160. struct or_blobrec blobrec;
  161. /********************************************************/
  162. /* */
  163. /* UPDATE_MAXDBA */
  164. /* */
  165. /********************************************************/
  166. /* Ensures global var 'maxdba' always contains highest D00 slot number */
  167. #define UPDATE_MAXDBA(dba) {if((dba&0xffffff)>maxdba)maxdba=dba&0xffffff;}
  168. /********************************************************/
  169. /* */
  170. /* segregate_dicname */
  171. /* */
  172. /********************************************************/
  173. /* Separates dictionary name from pathname and loads
  174. * them into the globals 'dicname' and 'dicpath'.
  175. * Returns TRUE if dicname is valid, else returns FALSE.
  176. */
  177. static int segregate_dicname (char *string)
  178. {
  179. char *ptr;
  180. int i;
  181. strncpy (dicpath, string, sizeof (dicpath));
  182. dicpath[sizeof (dicpath) - 1] = 0;
  183. /* Set 'ptr' to just the 8 char dictionary name by moving
  184. * it backwards until first non-alphanumeric character
  185. * (such as a ":" in the dos drive id or a slash between directories),
  186. * or to the beginning of string.
  187. */
  188. for (ptr = dicpath + strlen (dicpath) - 1; ptr >= dicpath; ptr--)
  189. if (!isalnum (*ptr)) {
  190. ptr++;
  191. break;
  192. }
  193. if (ptr < dicpath)
  194. ptr = dicpath;
  195. /* test for valid dictionary name */
  196. i = strlen (ptr);
  197. if (i < 1 || i > 8)
  198. return FALSE;
  199. strcpy (dicname, ptr);
  200. *ptr = 0; /* truncate dicname off of full path/dicname */
  201. return TRUE;
  202. } /* segregate_dicname() */
  203. /********************************************************/
  204. /* */
  205. /* user_args_processor */
  206. /* */
  207. /********************************************************/
  208. /* handles command line arguments for 'main' */
  209. static void user_args_processor (int argc, char **argv)
  210. {
  211. char *argptr;
  212. char *src, *targ;
  213. if (argc <= 1) {
  214. PRINT_USAGE:
  215. printf (CATGETS(dtsearch_catd, MS_cravel, 1,
  216. "\nUSAGE: %s -d<dbname> [options] infile\n"
  217. " Listed default file name extensions can be overridden.\n"
  218. " -d<dbname> 1 - 8 char database name, incl optional path prefix.\n"
  219. " File name extensions automatically appended.\n"
  220. " -t<etxstr> End of text doc delimiter string. Default '\\f\\n'.\n"
  221. " -c Initialize database record count by counting records.\n"
  222. " -p<N> Print a progress dot every <N> records (default %d).\n"
  223. " -h<N> Change duplicate rec id hash table size from %ld to <N>.\n"
  224. " -h0 means there are no duplicates, don't check for them.\n"
  225. " -e<path> Path-filename of huffman encode table (default %s).\n"
  226. " <infile> Input [path]file name. Default extension %s.\n"
  227. ),
  228. aa_argv0,
  229. RECS_PER_DOT, default_hashsize,
  230. FNAME_HUFFCODE_TAB, EXT_FZKEY);
  231. DtSearchExit (2);
  232. }
  233. /* Each pass grabs new parm of "-xxx" format */
  234. for (argc--, argv++; argc > 0 && ((*argv)[0] == '-' || (*argv)[0] == '+');
  235. argc--, argv++) {
  236. argptr = argv[0];
  237. if (strncmp (argptr, "-russell", 8) == 0) {
  238. debug_mode = TRUE;
  239. if (argptr[8] == '2')
  240. debug_encode = TRUE;
  241. continue;
  242. }
  243. argptr[1] = tolower (argptr[1]);
  244. switch (argptr[1]) {
  245. case 'd': /* (D)ictionary */
  246. /* May include both dicname and dicpath */
  247. if (!segregate_dicname (argptr + 2)) {
  248. printf (CATGETS(dtsearch_catd, MS_cravel, 246,
  249. "\n%s '%s' is invalid path/dictionary name.\n"),
  250. PROGNAME, argptr);
  251. goto PRINT_USAGE;
  252. }
  253. break;
  254. case 't': /* ETX delimiter string */
  255. /* Replace any "\n" string with real linefeed */
  256. targ = parg.etxdelim = malloc (strlen (argptr + 2) + 4);
  257. src = argptr + 2;
  258. while (*src) {
  259. if (src[0] == '\\' && src[1] == 'n') {
  260. *targ++ = '\n';
  261. src += 2;
  262. }
  263. else
  264. *targ++ = *src++;
  265. }
  266. *targ = 0;
  267. break;
  268. case 'p':
  269. if ((recs_per_dot = atoi (argptr + 2)) <= 0) {
  270. recs_per_dot = RECS_PER_DOT;
  271. printf (CATGETS(dtsearch_catd, MS_cravel, 582,
  272. "%sIgnored invalid progress dot argument '%s'.\n"),
  273. PROGNAME "582 ", argptr);
  274. }
  275. break;
  276. case 'e':
  277. append_ext (fname_huffcode_tab, sizeof (fname_huffcode_tab),
  278. argptr + 2, EXT_HUFFCODE);
  279. break;
  280. case 'h':
  281. duprec_hashsize = atol (argptr + 2);
  282. if (duprec_hashsize == 0UL)
  283. printf (CATGETS(dtsearch_catd, MS_cravel, 13,
  284. "%s Duplicate record id checking disabled.\n"),
  285. PROGNAME);
  286. break;
  287. case 'c': /* force correct initial reccount by counting
  288. * records */
  289. system_reccount = -1;
  290. break;
  291. default:
  292. UNKNOWN_ARG:
  293. printf (CATGETS(dtsearch_catd, MS_cravel, 14,
  294. "\n%s Unknown command line argument '%s'.\n"),
  295. PROGNAME, argptr);
  296. } /* endswitch */
  297. } /* endwhile for cmd line '-'processing */
  298. /* validate input file name */
  299. if (argc <= 0) {
  300. puts (CATGETS(dtsearch_catd, MS_cravel, 15,
  301. "\nMissing required input file name.\a"));
  302. goto PRINT_USAGE;
  303. }
  304. else
  305. append_ext (fname_input, sizeof (fname_input), argv[0], EXT_FZKEY);
  306. /* check for missing database name */
  307. if (dicname[0] == 0) {
  308. puts (CATGETS(dtsearch_catd, MS_cravel, 16,
  309. "\nNo database name specified (-d argument).\a"));
  310. goto PRINT_USAGE;
  311. }
  312. return;
  313. } /* user_args_processor() */
  314. /****************************************/
  315. /* */
  316. /* count_all_records */
  317. /* */
  318. /****************************************/
  319. /* Initializes system_reccount and maxdba by
  320. * actually counting all records in database.
  321. * Must be called after dbrec has been read to ensure
  322. * maxdba accounts for last miscrec slot number.
  323. */
  324. static void count_all_records (void)
  325. {
  326. char keybuf[DtSrMAX_DB_KEYSIZE + 4];
  327. printf (CATGETS(dtsearch_catd, MS_cravel, 17,
  328. "%s Initializing total record count "
  329. "in database by actually counting...\n"),
  330. PROGNAME);
  331. system_reccount = 0;
  332. maxdba = 0;
  333. KEYFRST (PROGNAME "286", OR_OBJKEY, 0);
  334. while (db_status == S_OKAY) {
  335. KEYREAD (PROGNAME "288", keybuf);
  336. if (db_status != S_OKAY)
  337. vista_abort (PROGNAME "288");
  338. /* don't count records beginning with ctrl char */
  339. if (keybuf[0] >= 32) {
  340. system_reccount++;
  341. CRGET (PROGNAME "251", &objdba, 0);
  342. UPDATE_MAXDBA (objdba);
  343. }
  344. KEYNEXT (PROGNAME "291", OR_OBJKEY, 0);
  345. }
  346. /* account for last record's misc record slots */
  347. maxdba += dbrec.or_recslots;
  348. return;
  349. } /* count_all_records() */
  350. /****************************************/
  351. /* */
  352. /* read_dbrec */
  353. /* */
  354. /****************************************/
  355. /* Read the database's dbrec and load global variables
  356. * system_reccount and maxdba with current values from db.
  357. */
  358. static void read_dbrec (void)
  359. {
  360. RECFRST (PROGNAME "285", OR_DBREC, 0); /* seqtl retrieval */
  361. if (db_status != S_OKAY) {
  362. printf (CATGETS(dtsearch_catd, MS_misc, 13,
  363. "%sNo DB record in database '%s'.\n"
  364. " The usual cause is failure to initialize "
  365. "the database (run initausd).\n"),
  366. PROGNAME"296 ", dicname);
  367. DtSearchExit (8);
  368. }
  369. RECREAD (PROGNAME "302", &dbrec, 0);
  370. if (db_status != S_OKAY)
  371. vista_abort (PROGNAME "303");
  372. swab_dbrec (&dbrec, NTOH);
  373. if (debug_mode) {
  374. printf (PROGNAME
  375. " DBREC: reccount=%ld maxdba=%ld vers='%s' dbacc=%d\n"
  376. " fzkeysz=%d abstrsz=%d maxwordsz=%d otype=%d lang=%d\n"
  377. " hufid=%ld flags=x%x compflags=x%x uflags=x%lx sec=x%lx\n"
  378. ,(long)dbrec.or_reccount
  379. ,(long)dbrec.or_maxdba
  380. ,dbrec.or_version
  381. ,(int)dbrec.or_dbaccess
  382. ,(int)dbrec.or_fzkeysz
  383. ,(int)dbrec.or_abstrsz
  384. ,(int)dbrec.or_maxwordsz
  385. ,(int)dbrec.or_dbotype
  386. ,(int)dbrec.or_language
  387. ,(long)dbrec.or_hufid
  388. ,(int)dbrec.or_dbflags
  389. ,(int)dbrec.or_compflags
  390. ,(long)dbrec.or_dbuflags
  391. ,(long)dbrec.or_dbsecmask
  392. );
  393. }
  394. dbrec_hufid = dbrec.or_hufid;
  395. /* Confirm compatible program-database version numbers */
  396. if (!is_compatible_version (dbrec.or_version, SCHEMA_VERSION)) {
  397. printf (CATGETS(dtsearch_catd, MS_misc, 14,
  398. "%s Program schema version '%s' incompatible with "
  399. "database '%s' version '%s'.\n") ,
  400. PROGNAME"245", SCHEMA_VERSION, dicname, dbrec.or_version);
  401. DtSearchExit(4);
  402. }
  403. /* If blobs are specified for the database,
  404. * they must be compressed blobs.
  405. */
  406. switch (dbrec.or_dbaccess) {
  407. case ORA_VARIES: /* use of blobs determined obj by obj */
  408. case ORA_BLOB: /* objects stored directly in blobs */
  409. case ORA_REFBLOB: /* refs to objects stored in blobs */
  410. blobs_are_used = TRUE;
  411. if (!(dbrec.or_compflags & ORC_COMPBLOB)) {
  412. /* = don't compress blobs */
  413. printf (CATGETS(dtsearch_catd, MS_cravel, 717,
  414. "%s Aborting: Uncompressed blobs not yet supported.\n"),
  415. PROGNAME"717");
  416. DtSearchExit (5);
  417. }
  418. break;
  419. default:
  420. blobs_are_used = FALSE;
  421. break;
  422. }
  423. /* Initialize global variable maxdba, which records largest slot number.
  424. * If requested, init tot reccount by actually counting records.
  425. */
  426. if (system_reccount == -1)
  427. count_all_records ();
  428. else {
  429. system_reccount = dbrec.or_reccount;
  430. maxdba = dbrec.or_maxdba;
  431. }
  432. printf (CATGETS(dtsearch_catd, MS_cravel, 18,
  433. "%s: '%s' schema ver = %s, rec count = %ld, last slot = %ld.\n"),
  434. aa_argv0, dicname, dbrec.or_version,
  435. (long)system_reccount, (long)maxdba);
  436. return;
  437. } /* read_dbrec() */
  438. /****************************************/
  439. /* */
  440. /* write_dbrec */
  441. /* */
  442. /****************************************/
  443. /* Write the database's updated reccount and maxdba fields */
  444. static void write_dbrec (void)
  445. {
  446. int i;
  447. DtSrINT32 int32;
  448. RECFRST (PROGNAME "355", OR_DBREC, 0); /* seqtl retrieval */
  449. if (db_status != S_OKAY)
  450. vista_abort (PROGNAME "356");
  451. int32 = htonl (system_reccount);
  452. CRWRITE (PROGNAME "341", OR_RECCOUNT, &int32, 0);
  453. int32 = htonl (maxdba);
  454. CRWRITE (PROGNAME "342", OR_MAXDBA, &int32, 0);
  455. /* If this was the first load of a new database,
  456. * save the huffman encode table id.
  457. */
  458. if (blobs_are_used && dbrec_hufid == -1) {
  459. int32 = htonl ((DtSrINT32)gen_vec_hufid);
  460. CRWRITE (PROGNAME "343", OR_HUFID, &int32, 0);
  461. }
  462. if (db_status != S_OKAY)
  463. vista_abort (PROGNAME "344");
  464. printf (CATGETS(dtsearch_catd, MS_cravel, 19,
  465. "%s: Final database record count = %ld, last slot = %ld.\n"),
  466. aa_argv0, (long)system_reccount, (long)maxdba);
  467. return;
  468. } /* write_dbrec() */
  469. /************************************************/
  470. /* */
  471. /* print_progress */
  472. /* */
  473. /************************************************/
  474. /* prints complete progress message and statistics to stdout */
  475. static void print_progress (void)
  476. {
  477. time_t seconds = time (NULL) - starttime;
  478. long bytes_in = ftell (infile);
  479. if (bytes_in <= 0L)
  480. bytes_in = fstat_input.st_size; /* make final msg "100%" */
  481. TERMINATE_LINE
  482. printf (CATGETS(dtsearch_catd, MS_cravel, 20,
  483. "%s: %ld input records processed in %ldm %lds, (%ld%%).\n"
  484. " %ld duplicates, %ld new, %ld updates.\n"),
  485. aa_argv0,
  486. input_reccount, seconds / 60L, seconds % 60L,
  487. (bytes_in * 100L) / fstat_input.st_size,
  488. duplicate_recids, created_reccount, updated_reccount);
  489. need_final_progress_msg = FALSE;
  490. return;
  491. } /* print_progress() */
  492. /************************************************/
  493. /* */
  494. /* print_exit_code */
  495. /* */
  496. /************************************************/
  497. /* Called from inside DtSearchExit() at austext_exit_last */
  498. static void print_exit_code (int exit_code)
  499. {
  500. if (dotcount) {
  501. putchar ('\n');
  502. dotcount = 0;
  503. }
  504. printf ( CATGETS(dtsearch_catd, MS_cravel, 2,
  505. "%s: Exit code = %d\n") ,
  506. aa_argv0, exit_code);
  507. fflush (aa_stderr);
  508. fflush (stdout);
  509. return;
  510. } /* print_exit_code() */
  511. /************************************************/
  512. /* */
  513. /* load_next_miscrec */
  514. /* */
  515. /************************************************/
  516. /* Repeatedly called from create_object() or update_object()
  517. * to fill miscrec buffer with next FZKABSTR type miscrec
  518. * from input file data saved in fzkbuf and abstrbuf.
  519. * First call for a given object is signaled by passed arg.
  520. * Thereafter static pointers keep track of where we are
  521. * in the source bufs to correctly load the next miscrec.
  522. * Initial state = fill-with-fzkey, if there is a fzkey.
  523. * Second state = fill-with-abstract, if there is an abstract.
  524. * Last state = zero-fill balance of remaining misc records.
  525. * Returns TRUE until last state completed (no more miscrecs can be written).
  526. */
  527. static int load_next_miscrec (int first_call)
  528. {
  529. static enum {
  530. FILL_FZKEY, FILL_ABSTR, FILL_ZEROS
  531. }
  532. fill_state = FILL_ZEROS;
  533. static char *src = NULL;
  534. static int srclen = 0;
  535. static int totbytes = 0;
  536. int i;
  537. char *targ;
  538. /* Initialize static variables at first call. */
  539. if (first_call) {
  540. /* If fzkey-abstract misc recs not used, return immediately. */
  541. if ((totbytes = dbrec.or_fzkeysz + dbrec.or_abstrsz) <= 0)
  542. return FALSE;
  543. if (dbrec.or_fzkeysz > 0) {
  544. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_cravel, 522,
  545. "%s This version of %s does not support semantic databases.\n"),
  546. PROGNAME"522", aa_argv0);
  547. DtSearchExit (13);
  548. }
  549. else {
  550. fill_state = FILL_ABSTR;
  551. src = abstrbuf;
  552. srclen = dbrec.or_abstrsz;
  553. }
  554. }
  555. /* If NOT first call, but we've finished writing everything out,
  556. * then tell the caller there's nothing left to do.
  557. */
  558. else if (totbytes <= 0)
  559. return FALSE;
  560. /* Main loop is on each byte of the or_misc field of miscrec.
  561. * Depending on the fill state, the byte will be a fzkey byte,
  562. * an abstract byte, or a binary zero byte.
  563. */
  564. targ = (char *) miscrec.or_misc;
  565. for (i = 0; i < sizeof(miscrec.or_misc); i++, totbytes--) {
  566. switch (fill_state) {
  567. case FILL_FZKEY:
  568. *targ++ = *src++;
  569. if (--srclen <= 0) { /* end of fzkey? */
  570. if (dbrec.or_abstrsz > 0) {
  571. fill_state = FILL_ABSTR;
  572. src = abstrbuf;
  573. srclen = dbrec.or_abstrsz;
  574. }
  575. else
  576. fill_state = FILL_ZEROS;
  577. }
  578. break;
  579. case FILL_ABSTR:
  580. if (*src == 0 || --srclen <= 0) /* end of abstract? */
  581. fill_state = FILL_ZEROS;
  582. *targ++ = *src++;
  583. break;
  584. case FILL_ZEROS:
  585. *targ++ = 0;
  586. break;
  587. default:
  588. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_misc, 25,
  589. "%sAbort due to program error.\n"),
  590. PROGNAME "549 ");
  591. DtSearchExit (54);
  592. } /* end switch */
  593. } /* end for-loop */
  594. miscrec.or_misctype = ORM_FZKABS;
  595. return TRUE;
  596. } /* load_next_miscrec() */
  597. /************************************************/
  598. /* */
  599. /* create_object */
  600. /* */
  601. /************************************************/
  602. /* Creates new object rec and misc recs from current vista rec.
  603. * Sets global objdba to new rec's dba and updates maxdba if necessary.
  604. * 1 create fields in objrec buffer, and write it.
  605. * (or_objsize will be rewritten after text size has been determined.)
  606. * 2 create fzkey-abstract rec as necessary.
  607. */
  608. static void create_object (char *key)
  609. {
  610. int i;
  611. char *src, *targ;
  612. DB_ADDR tempdba;
  613. memset (&objrec, 0, sizeof (objrec));
  614. /* Copy the key into the buffer. The previous initialization
  615. * ensures that the key will be padded on the right with zero fill.
  616. * At this point, key length should never be too long because
  617. * it has been previously tested (when the line was first read in).
  618. */
  619. src = key;
  620. targ = objrec.or_objkey;
  621. for (i = 0; i < DtSrMAX_DB_KEYSIZE; i++) {
  622. if (*src == 0)
  623. break;
  624. *targ++ = *src++;
  625. }
  626. /* Objdate will be updated later if line #4 has
  627. * valid DtSrObjdate format. Otherwise current
  628. * date/time stamp will be the default.
  629. */
  630. objrec.or_objdate = starttimeobjd;
  631. /* If all objects in database are same type, mark approp obj flag */
  632. if (dbrec.or_dbotype != 0)
  633. objrec.or_objtype = dbrec.or_dbotype;
  634. /* If blobs are never used, mark each obj as 'unretrievable' */
  635. if (!blobs_are_used)
  636. objrec.or_objflags |= DtSrFlNOTAVAIL;
  637. swab_objrec (&objrec, HTON);
  638. FILLNEW (PROGNAME "487", OR_OBJREC, &objrec, 0);
  639. if (db_status != S_OKAY)
  640. vista_abort (PROGNAME "495");
  641. CRGET (PROGNAME "375", &objdba, 0); /* save object's dba */
  642. UPDATE_MAXDBA (objdba);
  643. if (debug_mode)
  644. printf ("---> new rec: inrecno %6ld, slot %6ld, key '%s'\n",
  645. (long int) input_reccount, (long int) objdba & 0xffffff, objrec.or_objkey);
  646. /* Make current object record the owner of all its sets */
  647. SETOR (PROGNAME "376", OR_OBJ_BLOBS, 0);
  648. SETOR (PROGNAME "377", OR_OBJ_MISCS, 0);
  649. /* If fzkeys and/or abstracts are used,
  650. * write out the misc record(s) now.
  651. */
  652. if (load_next_miscrec (TRUE))
  653. do {
  654. HTONS (miscrec.or_misctype);
  655. FILLNEW (PROGNAME "501", OR_MISCREC, &miscrec, 0);
  656. CRGET (PROGNAME "503", &tempdba, 0);
  657. UPDATE_MAXDBA (tempdba);
  658. CONNECT (PROGNAME "505", OR_OBJ_MISCS, 0);
  659. } while (load_next_miscrec (FALSE));
  660. system_reccount++; /* new obj rec, so incr tot num database recs */
  661. created_reccount++;
  662. return;
  663. } /* create_object() */
  664. /************************************************/
  665. /* */
  666. /* update_object */
  667. /* */
  668. /************************************************/
  669. /* Reinitializes portions of preexisting object rec.
  670. * (Presumes vista 'current record' is desired object rec.)
  671. * Sets objdba to rec's dba and updates maxdba if necessary.
  672. * System_reccount is not altered because this is not a new record.
  673. * 1 reinit certain fields in objrec, and rewrite it.
  674. * (or_objsize will be rewritten after text size has been determined.)
  675. * 2 delete all blobs (there should be no hyper recs,
  676. * and existing user notes should not be changed).
  677. * 3 update fzkey-abstract rec(s) as necessary.
  678. * Important: misc rec updates should always be IN-PLACE.
  679. * If miscrecs are deleted first then readded,
  680. * there is no guarantee that their slots will be adjacent.
  681. * This will screw up bit vector calculations in the inverted
  682. * index word searches. In-place updates are faster anyway,
  683. * and we know that the number of misc rec slots is constant.
  684. */
  685. static void update_object (char *key)
  686. {
  687. int i;
  688. int first_fzkabstr = TRUE;
  689. DtSrINT16 misctype;
  690. DtSrINT32 int32;
  691. DB_ADDR tempdba;
  692. DtSrINT32 zero_objsize = 0;
  693. /* Slot number is dba with high order byte (filenum) parsed out */
  694. CRGET (PROGNAME "467", &objdba, 0); /* save object's dba */
  695. UPDATE_MAXDBA (objdba);
  696. if (debug_mode)
  697. printf ("----> update: inrecno %6ld, slot %6ld, key '%s'\n",
  698. (long int) input_reccount, (long int) objdba & 0xffffff, key);
  699. /* Reinit certain fields.
  700. * Objsize will be rewritten after new text size determined.
  701. * Objdate will be rewritten if .fzk file has valid
  702. * DtSrObjdate format in line #4.
  703. */
  704. CRWRITE (PROGNAME "472", OR_OBJSIZE, &zero_objsize, 0);
  705. int32 = htonl (starttimeobjd);
  706. CRWRITE (PROGNAME "681", OR_OBJDATE, &int32, 0);
  707. /* Make current object record the owner of all its sets */
  708. SETOR (PROGNAME "475", OR_OBJ_BLOBS, 0);
  709. SETOR (PROGNAME "476", OR_OBJ_MISCS, 0);
  710. /* Delete all blobs in a loop */
  711. FINDFM (PROGNAME "480", OR_OBJ_BLOBS, 0);
  712. while (db_status == S_OKAY) {
  713. DISDEL (PROGNAME "482", 0);
  714. FINDFM (PROGNAME "483", OR_OBJ_BLOBS, 0);
  715. }
  716. /* Update all miscrecs in a loop.
  717. * User notes are left alone,
  718. * and fzkey-abstracts are updated.
  719. * Currently other types are not allowed.
  720. */
  721. first_fzkabstr = TRUE;
  722. FINDFM (PROGNAME "480", OR_OBJ_MISCS, 0);
  723. while (db_status == S_OKAY) {
  724. CRREAD (PROGNAME "496", OR_MISCTYPE, &misctype, 0);
  725. NTOHS (misctype);
  726. switch (misctype) {
  727. case ORM_OLDNOTES:
  728. case ORM_HUGEKEY:
  729. break; /* do nothing */
  730. case ORM_FZKABS: /* combined fzkey-abstract rec */
  731. if (load_next_miscrec (first_fzkabstr)) {
  732. HTONS (miscrec.or_misctype);
  733. RECWRITE (PROGNAME "601", &miscrec, 0);
  734. CRGET (PROGNAME "605", &tempdba, 0);
  735. UPDATE_MAXDBA (tempdba);
  736. first_fzkabstr = FALSE;
  737. }
  738. else {
  739. DISDEL (PROGNAME "709", 0);
  740. }
  741. break;
  742. default:
  743. DISDEL (PROGNAME "529", 0);
  744. } /* end switch */
  745. FINDNM (PROGNAME "506", OR_OBJ_MISCS, 0);
  746. } /* end update loop for all members of OBJ_MISCS set */
  747. updated_reccount++;
  748. return;
  749. } /* update_object() */
  750. /************************************************/
  751. /* */
  752. /* call_encoder */
  753. /* */
  754. /************************************************/
  755. /* Called from main while reading document text.
  756. * Calls huffman compression encoder at convenient
  757. * intervals and at ETX.
  758. */
  759. static void call_encoder (UCHAR *ucharbuf, int buflen)
  760. {
  761. objsize += buflen;
  762. if (debug_encode) {
  763. sumlines += buflen;
  764. printf ("buflen = %d, sumlines = %d, cum objsize = %ld\n",
  765. (int)buflen, (int)sumlines, (long)objsize);
  766. }
  767. if (hc_encode (&blobrec, ucharbuf, buflen, FALSE)) {
  768. if (debug_encode) {
  769. sumblobs += blobrec.or_bloblen;
  770. printf ("---> WRITE sumlines = %d, bloblen = %d, "
  771. "sumblobs = %d, objsize = %ld\n",
  772. sumlines, (int)blobrec.or_bloblen,
  773. (int)sumblobs, (long)objsize);
  774. sumlines = 0;
  775. }
  776. HTONS (blobrec.or_bloblen);
  777. FILLNEW (PROGNAME "572", OR_BLOBREC, &blobrec, 0);
  778. CONNECT (PROGNAME "578", OR_OBJ_BLOBS, 0);
  779. }
  780. return;
  781. } /* call_encoder() */
  782. /************************************************/
  783. /* */
  784. /* main */
  785. /* */
  786. /************************************************/
  787. int main (int argc, char *argv[])
  788. {
  789. static int hufftab_never_loaded = TRUE;
  790. DBLK dblk;
  791. int i, linelen;
  792. DtSrINT32 int32;
  793. char *cptr, *targ, *src;
  794. char *db_key;
  795. char uniqkey [DtSrMAX_DB_KEYSIZE + 4];
  796. char linebuf [2048];
  797. struct tm *tmptr;
  798. /* Init globals */
  799. setlocale (LC_ALL, "");
  800. dtsearch_catd = CATOPEN(FNAME_DTSRCAT, 0);
  801. aa_argv0 = argv[0];
  802. time (&starttime);
  803. tmptr = localtime (&starttime);
  804. starttimeobjd = tm2objdate (tmptr);
  805. strftime (linebuf, sizeof (linebuf),
  806. CATGETS(dtsearch_catd, MS_misc, 22, "%A, %b %d %Y, %I:%M %p"),
  807. tmptr);
  808. printf (CATGETS(dtsearch_catd, MS_misc, 23,
  809. "%s: Version %s. Run %s.\n"),
  810. aa_argv0,
  811. DtSrVERSION,
  812. linebuf);
  813. austext_exit_last = print_exit_code;
  814. init_user_interrupt (); /* specify signal handlers */
  815. default_hashsize = duprec_hashsize; /* deflt val in isduprec.c */
  816. strcpy (fname_huffcode_tab, FNAME_HUFFCODE_TAB);
  817. dicname[0] = 0;
  818. dicpath[0] = 0;
  819. memset (&dblk, 0, sizeof(DBLK));
  820. memset (&parg, 0, sizeof(PARG));
  821. parg.dblk = &dblk;
  822. parg.etxdelim = ETXDELIM;
  823. /* Parse user's command line args and maybe change global variables */
  824. user_args_processor (argc, argv);
  825. strcpy (dblk.name, dicname);
  826. /* Open the database */
  827. if (debug_mode)
  828. printf (PROGNAME "211 database OPEN string = '%s%s'\n",
  829. dicpath, dicname);
  830. if (!austext_dopen (dicname, dicpath, NULL, 0, NULL)) {
  831. fprintf (aa_stderr, "%s\n", DtSearchGetMessages());
  832. DtSearchExit (3);
  833. }
  834. src = getcwd (linebuf, sizeof (linebuf));
  835. if (!src)
  836. src = getenv ("PWD");
  837. printf (CATGETS(dtsearch_catd, MS_misc, 24,
  838. "%s: cwd = '%s', fzkfile = '%s'\n"),
  839. aa_argv0,
  840. (src) ? src : CATGETS(dtsearch_catd, MS_misc, 6, "<unknown>"),
  841. fname_input);
  842. if ((infile = fopen (fname_input, "r")) == NULL) {
  843. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_misc, 12,
  844. "%sUnable to open %s:\n %s\n"),
  845. PROGNAME "326 ", fname_input, strerror (errno));
  846. DtSearchExit (6);
  847. }
  848. parg.ftext = infile; /* for discard_to_ETX() */
  849. /* Read in starting database record count and other db config/status data */
  850. read_dbrec ();
  851. /* If fzkeys and/or abstracts are used,
  852. * create correctly sized buffers for them.
  853. */
  854. if (dbrec.or_fzkeysz > 0) {
  855. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_cravel, 522,
  856. "%s This version of %s does not support semantic databases.\n"),
  857. PROGNAME"523", aa_argv0);
  858. DtSearchExit (13);
  859. }
  860. if (dbrec.or_abstrsz > 0)
  861. abstrbuf = austext_malloc (dbrec.or_abstrsz + 16, PROGNAME "744", NULL);
  862. /* Get input file size for progress msgs */
  863. if (fstat (fileno (infile), &fstat_input) == -1) {
  864. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_cravel, 29,
  865. "%s Unable to get status for %s: %s\n"),
  866. PROGNAME"337", fname_input, strerror (errno));
  867. DtSearchExit (10);
  868. }
  869. if (fstat_input.st_size <= 0L) {
  870. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_cravel, 30,
  871. "%s File %s is empty.\n"),
  872. PROGNAME"343", fname_input);
  873. DtSearchExit (7);
  874. }
  875. printf (CATGETS(dtsearch_catd, MS_cravel, 31,
  876. "%s: Each dot = %d records processed.\n"),
  877. aa_argv0, recs_per_dot);
  878. /*-------------------- MAIN LOOP --------------------
  879. * Executed once for each new input record.
  880. * 1. Read and process the FZKEY line.
  881. * 2. Read and process the ABSTRACT line.
  882. * 3. Read the UNIQUE KEY line.
  883. * Write out an object record at this point.
  884. * 4. Read and process the DATE line, update object rec.
  885. * 5. Use readchar_ftext to read document text until ETX.
  886. * Either blob it or discard it as appropriate.
  887. */
  888. while (!feof(infile)) {
  889. /*----- READ LINE #1, fzkey -------------------------
  890. * First line of new record.
  891. * Abort now if a shutdown signal was sent.
  892. * Skip null records (ETX str followed immediately by ETX str).
  893. * If this database uses fzkeys, "pack" current fzkey
  894. * and save it in the correct miscrec buffer.
  895. * If fzkeys are combined with abstracts they share the same
  896. * miscrec, otherwise they they reside in their own miscrec.
  897. * WARNING! Presumes or_fzkeysz <= the space allocated
  898. * for it in the correct miscrec.
  899. *-----------------------------------------------------*/
  900. if (fgets (linebuf, sizeof(linebuf) - 1, infile) == NULL)
  901. break;
  902. /* Got at least one line of a new record. Print progress dots */
  903. if (!(input_reccount % recs_per_dot)) {
  904. if (input_reccount) {
  905. putchar ('.');
  906. dotcount++;
  907. if (!(dotcount % 10))
  908. putchar (' ');
  909. if (dotcount >= 50) {
  910. print_progress ();
  911. dotcount = 0;
  912. }
  913. else
  914. fflush (stdout);
  915. }
  916. }
  917. input_reccount++;
  918. need_final_progress_msg = TRUE;
  919. linebuf [sizeof(linebuf)-1] = 0;
  920. linelen = strlen (linebuf);
  921. objsize = 0;
  922. if (shutdown_now) {
  923. TERMINATE_LINE
  924. printf (CATGETS(dtsearch_catd, MS_misc, 15,
  925. "%sReceived abort signal %d.\n"),
  926. PROGNAME"373 ", shutdown_now);
  927. write_dbrec (); /* at least update reccount and maxdba */
  928. DtSearchExit (100 + shutdown_now);
  929. }
  930. /* Skip null record */
  931. if (strcmp (linebuf, parg.etxdelim) == 0)
  932. continue;
  933. /*----- READ LINE #2, abstract ------------------------
  934. * Second line is abstract line. Save it in record buffer,
  935. * hopping over the first 10 chars ("ABSTRACT: ....").
  936. *-----------------------------------------------------*/
  937. if (fgets (linebuf, sizeof (linebuf) - 1, infile) == NULL)
  938. break;
  939. linebuf [sizeof(linebuf)-1] = 0;
  940. linelen = strlen (linebuf);
  941. if (strncmp (linebuf, "ABSTRACT: ", 10) != 0) {
  942. cptr = PROGNAME"580";
  943. INVALID_FORMAT:
  944. normal_exitcode = EXIT_WARNING;
  945. TERMINATE_LINE
  946. printf (CATGETS(dtsearch_catd, MS_cravel, 579,
  947. "%s Discarded rec #%ld: Invalid .fzk file format.\n"),
  948. cptr, input_reccount);
  949. if (strcmp (linebuf, parg.etxdelim) != 0)
  950. discard_to_ETX (&parg);
  951. continue;
  952. }
  953. /* If abstracts are used, save this one in the abstract buffer */
  954. if (dbrec.or_abstrsz > 0) {
  955. linebuf[--linelen] = 0; /* delete terminating \n */
  956. strncpy (abstrbuf, linebuf + 10, dbrec.or_abstrsz);
  957. abstrbuf[dbrec.or_abstrsz - 1] = 0;
  958. }
  959. /*--- READ LINE #3, unique database key ------------------
  960. * Third line is 'unique record id'.
  961. * If key is valid update old objrec
  962. * or create new one as necessary.
  963. * (There may be one more write required
  964. * after we determine total blob size).
  965. *-----------------------------------------------------*/
  966. if (fgets (linebuf, sizeof (linebuf) - 1, infile) == NULL)
  967. break;
  968. linebuf [sizeof(linebuf)-1] = 0;
  969. linelen = strlen (linebuf);
  970. if (strcmp (linebuf, parg.etxdelim) == 0) {
  971. cptr = PROGNAME"1068";
  972. goto INVALID_FORMAT;
  973. }
  974. /*
  975. * Isolate first token surrounded by whitespace
  976. * (and parse out \n)
  977. */
  978. if ((db_key = strtok (linebuf, " \t\n")) == NULL) {
  979. cptr = PROGNAME"1076";
  980. goto INVALID_FORMAT;
  981. }
  982. if (strlen (db_key) > DtSrMAX_DB_KEYSIZE - 1) {
  983. normal_exitcode = EXIT_WARNING;
  984. TERMINATE_LINE
  985. printf (CATGETS(dtsearch_catd, MS_cravel, 33,
  986. "%s Discarded rec #%ld: Key too long:\n '%s'.\n"),
  987. PROGNAME"606", input_reccount, db_key);
  988. discard_to_ETX (&parg);
  989. continue;
  990. }
  991. if (!isalnum (db_key[0])) {
  992. normal_exitcode = EXIT_WARNING;
  993. TERMINATE_LINE
  994. printf (CATGETS(dtsearch_catd, MS_cravel, 927,
  995. "%s Discarded rec #%ld: First char (keytype) of key\n"
  996. " '%s' is not alphanumeric.\n"),
  997. PROGNAME"927", input_reccount, db_key);
  998. discard_to_ETX (&parg);
  999. continue;
  1000. }
  1001. /* If duplicate record in fzk file, discard it. */
  1002. i = is_duprec (db_key);
  1003. if (i == 2) {
  1004. TERMINATE_LINE
  1005. printf (CATGETS(dtsearch_catd, MS_cravel, 34,
  1006. "%s Out of Memory! "
  1007. "Set -h arg to a smaller number,\n"
  1008. " or reduce the number of input records.\n"),
  1009. PROGNAME"1096");
  1010. DtSearchExit (55);
  1011. }
  1012. else if (i == 1) { /* skip duplicate record id */
  1013. normal_exitcode = EXIT_WARNING;
  1014. TERMINATE_LINE
  1015. printf (CATGETS(dtsearch_catd, MS_cravel, 35,
  1016. "%s: Discarded duplicate rec #%ld '%s'.\n"),
  1017. aa_argv0, input_reccount, db_key);
  1018. duplicate_recids++;
  1019. discard_to_ETX (&parg);
  1020. continue;
  1021. }
  1022. /*
  1023. * Try to read the object record from the database. If it
  1024. * already exists (UPDATE): delete all its blobs (there
  1025. * should be no hyper recs). create or update
  1026. * fzkey-abstract recs as necessary. dont change any
  1027. * existing user notes. update fields in objrec buffer,
  1028. * but don't write it yet-- objrec will be rewritten
  1029. * after text size has been determined. If it doesn't
  1030. * exist (CREATE): create fields in objrec buffer, and
  1031. * write it. create fzkey-abstract recs as necessary.
  1032. * objrec will be rewritten after text size has been
  1033. * determined. After update or create, objdba contains
  1034. * dba of curr obj record.
  1035. */
  1036. strcpy (uniqkey, db_key);
  1037. KEYFIND (PROGNAME "489", OR_OBJKEY, uniqkey, 0);
  1038. if (db_status == S_OKAY)
  1039. update_object (uniqkey);
  1040. else
  1041. create_object (uniqkey);
  1042. /*----- READ LINE #4, date -----------------------------
  1043. * Line #4 is object date/time string (OBJDATESTR format).
  1044. * It is no longer optional. If invalid, the current
  1045. * run date that was preloaded into the record is used.
  1046. *-----------------------------------------------------*/
  1047. if (fgets (linebuf, sizeof (linebuf) - 1, infile) == NULL)
  1048. break;
  1049. linebuf [sizeof(linebuf)-1] = 0;
  1050. linelen = strlen (linebuf);
  1051. if (!is_objdatestr (linebuf, &objdate)) {
  1052. normal_exitcode = EXIT_WARNING;
  1053. if (strcmp (linebuf, parg.etxdelim) == 0) {
  1054. cptr = PROGNAME"1155";
  1055. goto INVALID_FORMAT;
  1056. }
  1057. TERMINATE_LINE
  1058. printf (CATGETS(dtsearch_catd, MS_cravel, 1086,
  1059. "%s Record '%s' has invalid date format--"
  1060. "using run date.\n"),
  1061. PROGNAME"1086", uniqkey);
  1062. }
  1063. else { /* objdate is valid */
  1064. KEYFIND (PROGNAME "1098", OR_OBJKEY, uniqkey, 0);
  1065. if (db_status != S_OKAY)
  1066. vista_abort (PROGNAME "1101");
  1067. HTONL (objdate); /* ready for record writes */
  1068. CRWRITE (PROGNAME "1102", OR_OBJDATE, &objdate, 0);
  1069. }
  1070. /*----- READ TO ETX, record text ---------------------
  1071. * Balance of record (after line 4 to end of record marker)
  1072. * is text. It may or may not be formatted in neat ascii
  1073. * lines, ie it may not have periodic linefeeds (\n).
  1074. * If this database does not store compressed records (blobs)
  1075. * we just discard all chars to end of text delimiter (ETX).
  1076. * Otherwise we read it char by char using readchar_ftext()
  1077. * and fill linebuf to some convenient size.
  1078. *
  1079. * Repeated calls to hc_encode() build
  1080. * a compressed record in its own internal blobrec buffer.
  1081. * When the buffer is full, hc_encode copies it to
  1082. * the passed blobrec buffer and returns TRUE.
  1083. * The caller should then write out the blobrec.
  1084. * If hc_encode returns FALSE, its internal blobrec is not
  1085. * yet full so the caller should not yet write out his record.
  1086. *-----------------------------------------------------*/
  1087. if (!blobs_are_used) {
  1088. discard_to_ETX (&parg);
  1089. continue;
  1090. }
  1091. /*
  1092. * Initialize blob compression by reading in huffman
  1093. * encode table (first execution only). Ensure table id
  1094. * is same as one used for previous compressions, if any.
  1095. */
  1096. if (hufftab_never_loaded) {
  1097. hufftab_never_loaded = FALSE;
  1098. gen_vec (fname_huffcode_tab);
  1099. if (dbrec_hufid != gen_vec_hufid && dbrec_hufid != -1L) {
  1100. TERMINATE_LINE
  1101. printf (CATGETS(dtsearch_catd, MS_cravel, 1153,
  1102. "%s Current data compression table id"
  1103. " in '%s' is %ld.\n"
  1104. " Database '%s' previously compressed"
  1105. " with table %ld.\n"),
  1106. PROGNAME"1153 ", fname_huffcode_tab,
  1107. gen_vec_hufid, dicname, dbrec_hufid);
  1108. DtSearchExit (53);
  1109. }
  1110. }
  1111. /*
  1112. * Compress document text. Repeatedly load linebuf
  1113. * with fixed number of chars and compress it.
  1114. */
  1115. if (debug_encode) {
  1116. sumlines = 0;
  1117. sumblobs = 0;
  1118. }
  1119. if ((linebuf[0] = readchar_ftext (&parg)) == 0) {
  1120. normal_exitcode = EXIT_WARNING;
  1121. TERMINATE_LINE
  1122. printf ( CATGETS(dtsearch_catd, MS_cravel, 1215,
  1123. "%s Warning. Record '%s' has no text.\n"),
  1124. PROGNAME"1215" , uniqkey);
  1125. continue;
  1126. }
  1127. linelen = 1;
  1128. while (linebuf [linelen] = readchar_ftext (NULL)) {
  1129. if (++linelen >= 80) {
  1130. call_encoder ((UCHAR *)linebuf, linelen);
  1131. linelen = 0;
  1132. }
  1133. }
  1134. /*
  1135. * At ETX: If a partial line remains, process it just like
  1136. * the full lines above. Then write out total size to
  1137. * object record, and make the final call to hc_encode with
  1138. * empty line and TRUE flag to indicate 'no more text,
  1139. * flush your last partial buffer'.
  1140. */
  1141. if (linelen)
  1142. call_encoder ((UCHAR *)linebuf, linelen);
  1143. CRSET (PROGNAME "685", &objdba, 0);
  1144. int32 = htonl (objsize);
  1145. CRWRITE (PROGNAME "686", OR_OBJSIZE, &int32, 0);
  1146. if (hc_encode (&blobrec, (UCHAR *)"", 0, TRUE)) {
  1147. if (debug_encode) {
  1148. sumblobs += blobrec.or_bloblen;
  1149. printf ("---> FINAL sumlines =%d, bloblen = %d, "
  1150. "sumblobs = %ld, objsize = %ld\n",
  1151. (int)sumlines, (int)blobrec.or_bloblen,
  1152. (long)sumblobs, (long)objsize);
  1153. }
  1154. HTONS (blobrec.or_bloblen);
  1155. FILLNEW (PROGNAME "624", OR_BLOBREC, &blobrec, 0);
  1156. CONNECT (PROGNAME "625", OR_OBJ_BLOBS, 0);
  1157. }
  1158. } /* end main record loop */
  1159. if (need_final_progress_msg)
  1160. print_progress ();
  1161. fclose (infile);
  1162. write_dbrec ();
  1163. /* If all input records were discarded, complete processing
  1164. * but upgrade warning exit code to hard error code.
  1165. */
  1166. if (created_reccount <= 0L && updated_reccount <= 0L) {
  1167. normal_exitcode = EXIT_VANISH;
  1168. fprintf (stderr, CATGETS(dtsearch_catd, MS_cravel, 1048,
  1169. "%sDatabase objects not changed because input "
  1170. "file effectively empty.\n"),
  1171. PROGNAME "1048 ");
  1172. }
  1173. /* Close database and print return code via exits.
  1174. * Return code is either 0 (perfect), 1 (warnings),
  1175. * or 3 (input file effectively empty).
  1176. */
  1177. DtSearchExit (normal_exitcode);
  1178. } /* main() */
  1179. /*********************** DTSRLOAD.C ***************************/