dtsrclean.c 45 KB


  1. /*
  2. * CDE - Common Desktop Environment
  3. *
  4. * Copyright (c) 1993-2012, The Open Group. All rights reserved.
  5. *
  6. * These libraries and programs are free software; you can
  7. * redistribute them and/or modify them under the terms of the GNU
  8. * Lesser General Public License as published by the Free Software
  9. * Foundation; either version 2 of the License, or (at your option)
  10. * any later version.
  11. *
  12. * These libraries and programs are distributed in the hope that
  13. * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14. * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15. * PURPOSE. See the GNU Lesser General Public License for more
  16. * details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with these libraries and programs; if not, write
  20. * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21. * Floor, Boston, MA 02110-1301 USA
  22. */
  23. /*
  24. * COMPONENT_NAME: austext
  25. *
  26. * FUNCTIONS: TERMINATE_LINE
  27. * copy_new_d99
  28. * copy_old_d2x_to_new
  29. * end_of_job
  30. * main
  31. * open_all_files
  32. * print_progress
  33. * print_usage
  34. * read_d2x
  35. * signal_shutdown
  36. * user_args_processor
  37. * validation_error
  38. * write_d2x
  39. *
  40. * ORIGINS: 27
  41. *
  42. *
  43. * (C) COPYRIGHT International Business Machines Corp. 1993,1995
  44. * All Rights Reserved
  45. * Licensed Materials - Property of IBM
  46. * US Government Users Restricted Rights - Use, duplication or
  47. * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
  48. */
  49. /*************************** DTSRCLEAN.C ****************************
  50. * $TOG: dtsrclean.c /main/9 1998/04/17 11:23:57 mgreess $
  51. * Does garbage collection (ie compression) of .d99 file.
  52. * Optionally verifies all database addresses in d99.
  53. * Modification of clndtbs.c and checkd99.c.
  54. * Does NOT use austext engine so this must be modified if schema changes.
  55. *
  56. * INPUT FORMAT:
  57. * All command input is on command line. Reads existing d2x and d99 files.
  58. *
  59. * OUTPUT FORMAT:
  60. * New .d2x and .d99 files are placed into the directory specified by user.
  61. *
  62. * EXIT CODE STANDARDS:
  63. * 0 = normal.
  64. * 1 = warnings, but output should be ok.
  65. * 2 = failure in cmd line parse or other initialization; job never started.
  66. * 3 - 49 = fatal error, but output may be acceptable.
  67. * 50 - 99 = fatal error and output files are probably unusable.
  68. * (In this program, even input may be corrupted).
  69. * 100+ = aborting due to asynchronous interrupt signal.
  70. * Output files may or may not be unusable.
  71. *
  72. * $Log$
  73. * Revision 2.4 1996/05/08 16:20:50 miker
  74. * Added RENFILEs for new d2x files; austext_dopen no longer does.
  75. *
  76. * Revision 2.3 1996/02/01 18:13:06 miker
  77. * Deleted BETA definition.
  78. *
  79. * Revision 2.2 1995/10/26 14:51:08 miker
  80. * Renamed from mrclean.c. Added prolog.
  81. *
  82. * Log: mrclean.c,v
  83. * Revision 2.1 1995/09/22 21:18:52 miker
  84. * Freeze DtSearch 0.1, AusText 2.1.8
  85. *
  86. * Revision 1.11 1995/09/05 18:16:46 miker
  87. * Name, msg, and other minor changes for DtSearch..
  88. * Print messages if austext_dopen() fails.
  89. *
  90. * Revision 1.10 1995/06/02 15:52:42 miker
  91. * Cleaned up -m and bit vector overflow msgs.
  92. *
  93. * Revision 1.9 1995/05/30 19:15:58 miker
  94. * Print beta char in startup banner msg.
  95. * Remove -m option and max_totrecs; select bit vector
  96. * size from maxdba, not reccount.
  97. */
  98. #include "SearchP.h"
  99. #include <stdlib.h>
  100. #include <ctype.h>
  101. #include <string.h>
  102. #include <errno.h>
  103. #include <fcntl.h>
  104. #include <signal.h>
  105. #include <sys/stat.h>
  106. #include <locale.h>
  107. #include "vista.h"
  108. #include <sys/types.h>
  109. #include <netinet/in.h>
  110. #define MS_misc 1 /* msg catalog set number */
  111. #define MS_dtsrclean 26 /* msg catalog set number */
  112. #define DISCARD_FORMAT "%s\t\"%s\"\t%s\t%s\n" /* copied from oe.h */
  113. #define RECS_PER_DOT 1000
  114. #define DOTS_PER_MSG 50
  115. #define DISK_BLKSIZE 512
  116. #define MAX_CORRUPTION 100
  117. #define MAX_REC_READ (DISK_BLKSIZE / sizeof(DB_ADDR))
  118. /*
  119. * Max number of addresses to be read from database addresses
  120. * file, ie the size of one block read from hard disk.
  121. */
  122. #define PROGNAME "DTSRCLEAN"
  123. #define SHOW_NOTHING 0 /* bit arguments for end_of_job() */
  124. #define SHOW_USAGE 1
  125. #define SHOW_EXITCODE 2
  126. #define SHOW_PROGRESS 4
  127. #define TERMINATE_LINE() if(need_linefeed){fputc('\n',aa_stderr);need_linefeed=FALSE;}
  128. /*-------------------------- GLOBALS ----------------------------*/
  129. static char *arg_dbname = NULL;
  130. static char *arg_newpath = NULL;
  131. unsigned char *bit_vector = NULL;
  132. static size_t bytes_in = 0L;
  133. static size_t corruption_count = 0L;
  134. static struct or_swordrec
  135. d21new, d21old;
  136. static struct or_lwordrec
  137. d22new, d22old;
  138. static struct or_hwordrec
  139. d23new, d23old;
  140. static char datestr[32] = ""; /* "1946/04/17 13:03" */
  141. static int debug_mode = FALSE;
  142. static size_t dot_count = 0L;
  143. char fname_d99_new[1024];
  144. char fname_d99_old[1024];
  145. FILE *fp_d99_new = NULL;
  146. FILE *fp_d99_old = NULL;
  147. static FILE *frecids = NULL;
  148. static int is_valid_dba;
  149. static size_t max_corruption = MAX_CORRUPTION;
  150. static int normal_exitcode = 0;
  151. static int need_linefeed = FALSE;
  152. static int overlay_no = FALSE;
  153. static int overlay_yes = FALSE;
  154. static DtSrINT32
  155. reccount = 0;
  156. static DtSrINT32
  157. recslots; /* dbrec.or_recslots promoted to INT32 */
  158. static DtSrINT32
  159. dba_offset;
  160. static DtSrINT32
  161. recs_per_dot = RECS_PER_DOT;
  162. static int rewrite_reccount = FALSE;
  163. static int shutdown_now = 0; /* = FALSE */
  164. static size_t size_d21_old = 0L;
  165. static size_t size_d22_old = 0L;
  166. static size_t size_d23_old = 0L;
  167. static size_t size_d99_old = 0L;
  168. static time_t timestart = 0L;
  169. static DtSrINT32
  170. total_num_addrs = 0;
  171. static int validation_mode = FALSE;
  172. /********************************************************/
  173. /* */
  174. /* signal_shutdown */
  175. /* */
  176. /********************************************************/
  177. /* interrupt handler for SIGINT */
  178. static void signal_shutdown (int sig)
  179. {
  180. shutdown_now = 100 + sig;
  181. return;
  182. } /* signal_shutdown() */
  183. /************************************************/
  184. /* */
  185. /* print_usage */
  186. /* */
  187. /************************************************/
  188. /* Prints usage statement to stderr. */
  189. static void print_usage (void)
  190. {
  191. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 1,
  192. "\nUSAGE: %s [options] <dbname> <newpath>\n"
  193. " Compresses unused d99 space and validates d00-d99 links.\n"
  194. " -p<N> Progress dots printed every <N> records (default %lu).\n"
  195. " Complete progress message printed every %d dots.\n"
  196. " -oy Authorizes overlaying preexisting d99/d2<N> files in newpath.\n"
  197. " -on Forces exit if preexisting d99/d2<N> files in newpath.\n"
  198. " -v Validates d99 and d00 links, uncorrupts d99 file, and ensures\n"
  199. " accurate record count. Also use -c0 to uncorrupt entire database.\n"
  200. " -v<fname> Same as -v but also writes all d00 recs unreferenced by d99\n"
  201. " to <fname> in format suitable to extract into .fzk file format.\n"
  202. " -c<N> Exits if more than <N> corrupted/incomplete links (default %d).\n"
  203. " Corruption limit turned off by -c0.\n"
  204. " <dbname> 1 - 8 char database name = the old d99/d2<N> files to be updated.\n"
  205. " Files found in local directory or DBFPATH environment variable.\n"
  206. " <newpath> Specifies where the new d99/d2<N> files will be placed.\n"
  207. " If first char is not slash, path is relative to local directory.\n"
  208. "EXIT CODES:\n"
  209. " 0: Complete success. 1: Warning. 2: Job never started.\n"
  210. " 3-49: Job ended prematurely, old files ok, new files unusable.\n"
  211. " 50-99: Fatal Error, even old database may be corrupted.\n"
  212. " 100+: Ctrl-C, kill, and all other signal interrupts cause premature\n"
  213. " end, new files may be unusable. Signal = exit code - 100.\n")
  214. ,aa_argv0, RECS_PER_DOT, DOTS_PER_MSG, MAX_CORRUPTION);
  215. return;
  216. } /* print_usage() */
  217. /************************************************/
  218. /* */
  219. /* print_progress */
  220. /* */
  221. /************************************************/
  222. /* Prints progress msg after dots or at end of job.
  223. * Label is "Final" or "Progress".
  224. */
  225. static void print_progress (char *label)
  226. {
  227. long seconds;
  228. int compression;
  229. seconds = time (NULL) - timestart; /* total seconds elapsed */
  230. if (seconds < 0L)
  231. seconds = 0L;
  232. if ((float) bytes_in / (float) size_d99_old >= 99.5)
  233. compression = 100;
  234. else {
  235. compression = (int) (100.* (float) bytes_in / (float) size_d99_old);
  236. if (compression < 0 || compression > 100)
  237. compression = 0;
  238. }
  239. TERMINATE_LINE ();
  240. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 2,
  241. "%s: %s Compression %d%% (about %lu KB) in %ld:%02ld min:sec.\n") ,
  242. aa_argv0, label, compression, bytes_in / 1000L,
  243. seconds / 60UL, seconds % 60UL);
  244. if (*label == 'F')
  245. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 3,
  246. "%s: Counted %ld WORDS in %s.d99.\n") ,
  247. aa_argv0, (long)reccount, arg_dbname);
  248. return;
  249. } /* print_progress() */
  250. /************************************************/
  251. /* */
  252. /* end_of_job */
  253. /* */
  254. /************************************************/
  255. /* Exits program. Prints status messages before going down.
  256. * Should be called on even record boundaries whenever possible,
  257. * ie after record writes complete and shutdown_now > 0 (TRUE).
  258. */
  259. static void end_of_job (int exitcode, int show_flags)
  260. {
  261. TERMINATE_LINE ();
  262. if (exitcode >= 100) {
  263. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 66,
  264. "%s Aborting after interrupt signal %d.\n"),
  265. PROGNAME"66", exitcode - 100);
  266. }
  267. if (validation_mode && corruption_count == 0L)
  268. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 4,
  269. "%s: No corrupted links detected.\n") ,
  270. aa_argv0);
  271. if (corruption_count > 0L) {
  272. if (max_corruption > 0L && corruption_count >= max_corruption)
  273. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 193,
  274. "%s Aborting at %ld corrupted links.\n"),
  275. PROGNAME"193", corruption_count);
  276. else
  277. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 194,
  278. "%s Detected%s %ld corrupted/incomplete link(s).\n"),
  279. PROGNAME"194",
  280. (validation_mode) ? " and corrected" : "",
  281. corruption_count);
  282. }
  283. if (show_flags & SHOW_PROGRESS) {
  284. print_progress ("Final");
  285. }
  286. if (show_flags & SHOW_USAGE)
  287. print_usage ();
  288. if (show_flags & SHOW_EXITCODE)
  289. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 5,
  290. "%s: Exit code = %d.\n") , aa_argv0, exitcode);
  291. DtSearchExit (exitcode);
  292. } /* end_of_job() */
  293. /************************************************/
  294. /* */
  295. /* user_args_processor() */
  296. /* */
  297. /************************************************/
  298. /* Reads and verifies users command line arguments and
  299. * converts them into internal switches and variables.
  300. * Some attempt is made to read as many errors as possible
  301. * before ending job for bad arguments.
  302. */
  303. static void user_args_processor (int argc, char **argv)
  304. {
  305. char *argptr;
  306. int oops = FALSE;
  307. int i;
  308. time_t stamp;
  309. size_t tempsize;
  310. if (argc < 3)
  311. end_of_job (2, SHOW_USAGE);
  312. /* parse all args that begin with a dash (-) */
  313. while (--argc > 0) {
  314. argv++;
  315. argptr = argv[0];
  316. if (argptr[0] != '-')
  317. break;
  318. switch (tolower (argptr[1])) {
  319. case 'r':
  320. if (strcmp (argptr, "-russell") == 0) /* backdoor debug */
  321. debug_mode = TRUE;
  322. else
  323. goto UNKNOWN_ARG;
  324. case 'm':
  325. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 301,
  326. "%s The -m argument is no longer necessary.\n"),
  327. PROGNAME"301");
  328. break;
  329. case 'o':
  330. i = tolower (argptr[2]);
  331. if (i == 'n')
  332. overlay_no = TRUE;
  333. else if (i == 'y')
  334. overlay_yes = TRUE;
  335. else {
  336. INVALID_ARG:
  337. fprintf (aa_stderr,
  338. CATGETS(dtsearch_catd, MS_dtsrclean, 177,
  339. "%s Invalid %.2s argument.\n"),
  340. PROGNAME"177", argptr);
  341. oops = TRUE;
  342. }
  343. break;
  344. case 'v':
  345. validation_mode = TRUE;
  346. if (argptr[2] != '\0') {
  347. if ((frecids = fopen (argptr + 2, "w")) == NULL) {
  348. fprintf (aa_stderr,
  349. CATGETS(dtsearch_catd, MS_dtsrclean, 802,
  350. "%s Unable to open '%s' to output"
  351. " unreferenced d00 records:\n %s\n"),
  352. PROGNAME"802", argptr, strerror(errno));
  353. oops = TRUE;
  354. }
  355. time (&stamp);
  356. strftime (datestr, sizeof (datestr),
  357. "%Y/%m/%d %H:%M", localtime (&stamp));
  358. }
  359. break;
  360. case 'p':
  361. recs_per_dot = (DtSrINT32) atol (argptr + 2);
  362. if (recs_per_dot <= 0)
  363. goto INVALID_ARG;
  364. break;
  365. case 'c':
  366. tempsize = atol (argptr + 2);
  367. if (tempsize < 0L)
  368. goto INVALID_ARG;
  369. max_corruption = tempsize;
  370. break;
  371. UNKNOWN_ARG:
  372. default:
  373. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 159,
  374. "%s Unknown argument: '%s'.\n"),
  375. PROGNAME"159", argptr);
  376. oops = TRUE;
  377. break;
  378. } /* end switch */
  379. } /* end parse of cmd line args */
  380. /* Test how we broke loop.
  381. * There should still be 2 args past the ones
  382. * beginning with a dash: dbname and newpath.
  383. */
  384. if (argc != 2) {
  385. if (argc <= 0)
  386. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 210,
  387. "%s Missing required dbname argument.\n"),
  388. PROGNAME"210");
  389. if (argc <= 1)
  390. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 211,
  391. "%s Missing required newpath argument.\n"),
  392. PROGNAME"211");
  393. if (argc > 2)
  394. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 212,
  395. "%s Too many arguments.\n"),
  396. PROGNAME"212");
  397. oops = TRUE;
  398. }
  399. if (oops)
  400. end_of_job (2, SHOW_USAGE);
  401. /* DBNAME */
  402. arg_dbname = argv[0];
  403. if (strlen (arg_dbname) > 8) {
  404. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 229,
  405. "%s Invalid database name '%s'.\n"),
  406. PROGNAME"229", arg_dbname);
  407. end_of_job (2, SHOW_USAGE);
  408. }
  409. /* NEWPATH:
  410. * Oldpath and newpath are validated when the files
  411. * are copied and the database is opened.
  412. */
  413. arg_newpath = argv[1];
  414. return;
  415. } /* user_args_processor() */
  416. /************************************************/
  417. /* */
  418. /* validation_error() */
  419. /* */
  420. /************************************************/
  421. /* Subroutine of validation_mode in main().
  422. * Prints d2x and d99 data at location of error.
  423. * Adjusts d2x counts for number of good addrs and free slots.
  424. */
  425. static void validation_error (DB_ADDR dbaorig)
  426. {
  427. DB_ADDR slot;
  428. is_valid_dba = FALSE;
  429. slot = dbaorig >> 8;
  430. /* now efim retranslates back to real dba */
  431. if (dbaorig != -1)
  432. slot = ((slot + 1) * recslots - dba_offset)
  433. | (OR_D00 << 24);
  434. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 6,
  435. " DBA = %d:%ld (x%02x:%06lx), orig addr val = x%08lx\n"
  436. " Word='%c%s' offset=%ld addrs=%ld free=%d\n") ,
  437. OR_D00, slot, OR_D00, slot, dbaorig,
  438. (!isgraph (d23old.or_hwordkey[0])) ? '^' : d23old.or_hwordkey[0],
  439. d23old.or_hwordkey + 1, d23old.or_hwoffset,
  440. d23old.or_hwaddrs, d23old.or_hwfree);
  441. if (--d23new.or_hwaddrs < 0L)
  442. d23new.or_hwaddrs = 0L;
  443. /* (should never occur) */
  444. d23new.or_hwfree++;
  445. return;
  446. } /* validation_error() */
  447. /************************************************/
  448. /* */
  449. /* open_all_files */
  450. /* */
  451. /************************************************/
  452. static void open_all_files
  453. (FILE ** fp, char *fname, char *mode, size_t * size, int *oops) {
  454. struct stat fstatbuf;
  455. if ((*fp = fopen (fname, mode)) == NULL) {
  456. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 439,
  457. "%s Can't open %s: %s\n"),
  458. PROGNAME"439", fname, strerror (errno));
  459. *oops = TRUE;
  460. return;
  461. }
  462. if (fstat (fileno (*fp), &fstatbuf) == -1) {
  463. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 440,
  464. "%s Can't access status of %s: %s\n"),
  465. PROGNAME"440", fname, strerror (errno));
  466. *oops = TRUE;
  467. return;
  468. }
  469. if (size)
  470. if ((*size = fstatbuf.st_size) <= 0L) {
  471. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 499,
  472. "%s %s is empty.\n"),
  473. PROGNAME"499", fname);
  474. *oops = TRUE;
  475. }
  476. return;
  477. } /* open_all_files() */
  478. /************************************************/
  479. /* */
  480. /* copy_old_d2x_to_new */
  481. /* */
  482. /************************************************/
  483. static void copy_old_d2x_to_new
  484. (char *fname_old, char *fname_new, FILE * fp_old, FILE * fp_new) {
  485. char readbuf[1024 + 32];
  486. int i, j;
  487. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 7,
  488. "%s: Copying from old d2x files to %s...\n") ,
  489. aa_argv0, fname_new);
  490. for (;;) { /* loop ends when eof set on input stream */
  491. errno = 0;
  492. i = fread (readbuf, 1, sizeof (readbuf), fp_old);
  493. /* byte swap not required on pure copy operation */
  494. if (errno) {
  495. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 517,
  496. "%s Read error on %s: %s.\n"),
  497. PROGNAME"517", fname_old, strerror (errno));
  498. end_of_job (3, SHOW_EXITCODE);
  499. }
  500. j = fwrite (readbuf, 1, i, fp_new);
  501. if (i != j) {
  502. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 489,
  503. "%s Write error on %s: %s.\n"),
  504. PROGNAME"489", fname_new, strerror (errno));
  505. end_of_job (3, SHOW_EXITCODE);
  506. }
  507. if (shutdown_now)
  508. end_of_job (shutdown_now, SHOW_EXITCODE);
  509. if (feof (fp_old))
  510. break;
  511. }
  512. TERMINATE_LINE ();
  513. fclose (fp_old);
  514. fclose (fp_new);
  515. return;
  516. } /* copy_old_d2x_to_new() */
  517. /********************************/
  518. /* */
  519. /* read_d2x */
  520. /* */
  521. /********************************/
  522. /* Performs vista RECREAD on curr word record.
  523. * CALLER SHOULD CHECK DB_STATUS.
  524. */
  525. void read_d2x (struct or_hwordrec * glob_word, long field)
  526. {
  527. if (field == OR_SWORDKEY) {
  528. RECREAD (PROGNAME "061", &d21old, 0);
  529. if (db_status != S_OKAY)
  530. return;
  531. strncpy (glob_word->or_hwordkey, d21old.or_swordkey,
  532. DtSrMAXWIDTH_HWORD);
  533. glob_word->or_hwordkey[DtSrMAXWIDTH_HWORD - 1] = 0;
  534. glob_word->or_hwoffset = ntohl (d21old.or_swoffset);
  535. glob_word->or_hwfree = ntohl (d21old.or_swfree);
  536. glob_word->or_hwaddrs = ntohl (d21old.or_swaddrs);
  537. }
  538. else if (field == OR_LWORDKEY) {
  539. RECREAD (PROGNAME "069", &d22old, 0);
  540. if (db_status != S_OKAY)
  541. return;
  542. strncpy (glob_word->or_hwordkey, d22old.or_lwordkey,
  543. DtSrMAXWIDTH_HWORD);
  544. glob_word->or_hwordkey[DtSrMAXWIDTH_HWORD - 1] = 0;
  545. glob_word->or_hwoffset = ntohl (d22old.or_lwoffset);
  546. glob_word->or_hwfree = ntohl (d22old.or_lwfree);
  547. glob_word->or_hwaddrs = ntohl (d22old.or_lwaddrs);
  548. }
  549. else {
  550. RECREAD (PROGNAME "078", glob_word, 0);
  551. glob_word->or_hwordkey[DtSrMAXWIDTH_HWORD - 1] = 0;
  552. NTOHL (glob_word->or_hwoffset);
  553. NTOHL (glob_word->or_hwfree);
  554. NTOHL (glob_word->or_hwaddrs);
  555. }
  556. return;
  557. } /* read_d2x() */
  558. /********************************/
  559. /* */
  560. /* write_d2x */
  561. /* */
  562. /********************************/
  563. /* performs vista RECWRITE on curr word record.
  564. * CALLER MUST CHECK DB_STATUS.
  565. */
  566. static void write_d2x (struct or_hwordrec * glob_word, long field)
  567. {
  568. if (field == OR_SWORDKEY) {
  569. snprintf(d21new.or_swordkey, 16, "%s", glob_word->or_hwordkey);
  570. d21new.or_swoffset = htonl (glob_word->or_hwoffset);
  571. d21new.or_swfree = htonl (glob_word->or_hwfree);
  572. d21new.or_swaddrs = htonl (glob_word->or_hwaddrs);
  573. RECWRITE (PROGNAME "102", &d21new, 0);
  574. }
  575. else if (field == OR_LWORDKEY) {
  576. snprintf(d22new.or_lwordkey, 40, "%s", glob_word->or_hwordkey);
  577. d22new.or_lwoffset = htonl (glob_word->or_hwoffset);
  578. d22new.or_lwfree = htonl (glob_word->or_hwfree);
  579. d22new.or_lwaddrs = htonl (glob_word->or_hwaddrs);
  580. RECWRITE (PROGNAME"112", &d22new, 0);
  581. }
  582. else {
  583. HTONL (glob_word->or_hwoffset);
  584. HTONL (glob_word->or_hwfree);
  585. HTONL (glob_word->or_hwaddrs);
  586. RECWRITE (PROGNAME "115", glob_word, 0);
  587. }
  588. return;
  589. } /* write_d2x() */
  590. /************************************************/
  591. /* */
  592. /* copy_new_d99() */
  593. /* */
  594. /************************************************/
  595. /* The garbage collection/compression process itself.
  596. * For very large databases, there will be appx 3 million word records,
  597. * so the loop should be coded for ***EFFICIENCY***.
  598. */
  599. static void copy_new_d99 (long keyfield)
  600. {
  601. int is_odd_nibble;
  602. DtSrINT32 num_holes;
  603. DtSrINT32 slots_left;
  604. unsigned char *bvptr;
  605. int a;
  606. DB_ADDR dba, dbaorig;
  607. DtSrINT32 x;
  608. DtSrINT32 swapx;
  609. int done;
  610. DtSrINT32 good_addrs_left;
  611. DtSrINT32 good_addrs_this_block;
  612. DtSrINT32 num_reads, num_writes;
  613. DB_ADDR word_addrs[MAX_REC_READ + 64]; /* d99 read buf */
  614. DB_ADDR word_addrs_out[MAX_REC_READ + 64]; /* d99 write buf */
  615. KEYFRST (PROGNAME "179", keyfield, 0);
  616. while (db_status == S_OKAY) {
  617. read_d2x (&d23new, keyfield);
  618. if (validation_mode) /* save for validation err msgs */
  619. memcpy (&d23old, &d23new, sizeof (d23old));
  620. /*
  621. * Read old d99 file at specified offset to get total num
  622. * "holes". In the first portion of record holes are filled
  623. * with representations of valid database addresses +
  624. * statistical weights. In the second portion the holes are
  625. * "free slots" for future expansion which are
  626. * conventionally initialized with a -1.
  627. */
  628. /* force number of free slots to 0(ZERO) */
  629. d23new.or_hwfree = 0;
  630. fseek (fp_d99_old, d23new.or_hwoffset, SEEK_SET);
  631. num_holes = d23new.or_hwaddrs + d23new.or_hwfree;
  632. good_addrs_left = d23new.or_hwaddrs;
  633. bytes_in += sizeof (DB_ADDR) * num_holes;
  634. /* Update the offset in the d2x record buffer */
  635. d23new.or_hwoffset = ftell (fp_d99_new);
  636. /*
  637. * Copy the array of holes in each disk block, reading the
  638. * old and writing to the new. Loop ends when the number
  639. * of holes left will fit into one last block.
  640. */
  641. done = FALSE;
  642. while (!done) { /* loop on each block in this word */
  643. int ret = 0;
  644. if (num_holes > MAX_REC_READ) {
  645. num_reads = MAX_REC_READ;
  646. num_holes -= MAX_REC_READ;
  647. }
  648. else {
  649. done = TRUE;
  650. num_reads = num_holes;
  651. }
  652. errno = 0;
  653. ret = fread (word_addrs, sizeof(DB_ADDR), (size_t)num_reads, fp_d99_old);
  654. if (errno || -1 == ret) {
  655. TERMINATE_LINE ();
  656. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 657,
  657. "%s Read error on %s: %s.\n"),
  658. PROGNAME"657", fname_d99_old, strerror (errno));
  659. end_of_job (4, SHOW_PROGRESS + SHOW_EXITCODE);
  660. }
  661. /* Note BYTE_SWAP only needed for validation_mode.
  662. * If not validating, we're just going to copy
  663. * the network format dba's as is directly to
  664. * the new d99 file.
  665. */
  666. /*
  667. * Addrs on d99 are now 'record numbers' not dbas. A
  668. * rec# is what the dba/slot# would be if records took
  669. * up just one slot and there were no dbrec at start of
  670. * file. D99 rec#s start at #1, not #0.
  671. */
  672. /*
  673. * If user requested validation_mode, validate each
  674. * 'good' rec# (not free slots) in word_addrs buffer.
  675. * If any d99 links are corrupt, skip them when copying
  676. * to the new d99 file. Rewrite -1's to all free slots.
  677. * ----> NOTE UNUSUAL FORMAT OF DBA HOLES IN D99! <----
  678. * Record number is shifted to the high order 3 bytes.
  679. * The statistical weight is in the low order byte. The
  680. * vista file number is known from the #define constant
  681. * OR_D00, and the vista dba/slot# is mapped from rec#
  682. * by mult/div number of slots per rec, plus/minus
  683. * dbrec offset.
  684. */
  685. if (validation_mode) {
  686. for (swapx = 0; swapx < num_reads; swapx++)
  687. NTOHL (word_addrs[swapx]);
  688. /* set x to number of good addrs in this block */
  689. if (good_addrs_left > num_reads) {
  690. x = num_reads;
  691. good_addrs_left -= num_reads;
  692. }
  693. else {
  694. x = good_addrs_left;
  695. good_addrs_left = 0;
  696. }
  697. /*
  698. * Validate the rec#'s in this block. Note that
  699. * the loop is skipped if the entire block is free
  700. * slots.
  701. */
  702. good_addrs_this_block = 0;
  703. for (a = 0; a < x; a++) { /* a = index to curr dba */
  704. /*
  705. * Get rec#. Save original rec# for err msgs,
  706. * then shift slot number to lower 3 bytes,
  707. * discarding weight.
  708. */
  709. dbaorig = word_addrs[a]; /* rec#,rec#,rec#:wt */
  710. dba = dbaorig >> 8; /* 0,rec#,rec#,rec# */
  711. is_valid_dba = TRUE; /* default */
  712. /*
  713. * If original rec# == -1 we've overrun the
  714. * good rec#'s into the expansion area, which
  715. * is filled with -1's. This is real bad news
  716. * because if the counts in d02 are bad, the
  717. * online programs will quickly crash, and we
  718. * can't continue this program. Advance to next
  719. * rec# because we can't mark the bit vector.
  720. */
  721. if (dbaorig == -1L) {
  722. TERMINATE_LINE ();
  723. fprintf (aa_stderr,
  724. CATGETS(dtsearch_catd, MS_dtsrclean, 111,
  725. "*** %s DBA in d99 = -1. "
  726. "Probable overrun into expansion\n"
  727. " area due to incorrect count values "
  728. "in d2x file.\n"),
  729. PROGNAME"111");
  730. validation_error (dbaorig);
  731. corruption_count++;
  732. if (max_corruption > 0L &&
  733. corruption_count >= max_corruption)
  734. end_of_job (91, SHOW_PROGRESS + SHOW_EXITCODE);
  735. continue; /* skip the bit vector
  736. * check */
  737. }
  738. /*
  739. * If slot number > max totrecs, we have a
  740. * corrupted d99-d00 link because we've already
  741. * validated the d00 file and we know that it
  742. * has no slots > max. Also we have to advance
  743. * to next slot because we can't mark the bit
  744. * vector.
  745. */
  746. /******if (dba >= max_totrecs)*******/
  747. if (dba >= total_num_addrs) {
  748. TERMINATE_LINE ();
  749. fprintf (aa_stderr,
  750. CATGETS(dtsearch_catd, MS_dtsrclean, 222,
  751. "*** %s DBA in d99 not in d00,"
  752. " slot > max num docs.\n"),
  753. PROGNAME"222");
  754. validation_error (dbaorig);
  755. corruption_count++;
  756. if (max_corruption > 0L &&
  757. corruption_count >= max_corruption)
  758. end_of_job (92, SHOW_PROGRESS + SHOW_EXITCODE);
  759. continue; /* skip the bit vector check */
  760. }
  761. /*
  762. * Verify that dba exists in d00 file (test bit
  763. * #1). If not, mark bit #3 (3rd lowest) in
  764. * nibble and print error msg unless bit #3
  765. * previously marked.
  766. */
  767. bvptr = bit_vector + (dba >> 1);
  768. is_odd_nibble = (dba & 1L);
  769. if (!(*bvptr & ((is_odd_nibble) ? 0x01 : 0x10))) {
  770. /* bit #1 */
  771. if (!(*bvptr & ((is_odd_nibble) ? 0x04 : 0x40))) {
  772. /* bit #3 */
  773. *bvptr |= (is_odd_nibble) ? 0x04 : 0x40;
  774. TERMINATE_LINE ();
  775. fprintf (aa_stderr,
  776. CATGETS(dtsearch_catd, MS_dtsrclean, 333,
  777. "*** %s DBA in d99 does not exist in d00.\n"),
  778. PROGNAME"333");
  779. validation_error (dbaorig);
  780. corruption_count++;
  781. if (max_corruption > 0L &&
  782. corruption_count >= max_corruption)
  783. end_of_job (93, SHOW_PROGRESS + SHOW_EXITCODE);
  784. } /* endif where corrupt link
  785. * detected */
  786. }
  787. /*
  788. * Mark bit #2 in bit vector indicating a d99
  789. * reference.
  790. */
  791. *bvptr |= (is_odd_nibble) ? 0x02 : 0x20; /* bit #2 */
  792. /*
  793. * move good dba to curr output block, incr
  794. * counter
  795. */
  796. if (is_valid_dba)
  797. word_addrs_out[good_addrs_this_block++] = dbaorig;
  798. } /* end validation loop for each good dba in
  799. * the block */
  800. /*
  801. * Write out only validated addrs in current block.
  802. * If this was the last block, fill out all the
  803. * free slots, if any, with -1 values, and exit the
  804. * dba loop for this word.
  805. */
  806. if (good_addrs_this_block > 0) {
  807. for (swapx = 0; swapx < good_addrs_this_block; swapx++)
  808. NTOHL (word_addrs_out[swapx]);
  809. num_writes = fwrite (word_addrs_out, sizeof (DB_ADDR),
  810. (size_t)good_addrs_this_block, fp_d99_new);
  811. if (num_writes != good_addrs_this_block)
  812. goto WRITE_ERROR;
  813. }
  814. if (good_addrs_left <= 0) {
  815. /*
  816. * Write blocks of -1s until new d2x free slot
  817. * count is exhausted. The last block may be <
  818. * MAX_REC_READ.
  819. */
  820. slots_left = d23new.or_hwfree;
  821. while (slots_left > 0) {
  822. /*
  823. * set x to number of -1's to write for
  824. * this block
  825. */
  826. if (slots_left > MAX_REC_READ) {
  827. x = MAX_REC_READ;
  828. slots_left -= MAX_REC_READ;
  829. }
  830. else {
  831. x = slots_left;
  832. slots_left = 0;
  833. }
  834. for (a = 0; a < x; a++)
  835. word_addrs_out[a] = (DtSrINT32) -1;
  836. /* BYTE_SWAP not required for foxes */
  837. num_writes = fwrite (word_addrs_out,
  838. sizeof(DB_ADDR), (size_t)x, fp_d99_new);
  839. if (num_writes != x)
  840. goto WRITE_ERROR;
  841. } /* end while loop to write out all -1's */
  842. done = TRUE;
  843. }
  844. } /* endif for validation_mode for this block */
  845. /*
  846. * If NOT in validation mode, just write out the new
  847. * d99 block as an exact copy of the input block.
  848. * BYTE_SWAP not required because word_addrs is
  849. * still in its original network order from the fread.
  850. */
  851. else {
  852. num_writes = fwrite (word_addrs, sizeof(DB_ADDR),
  853. (size_t)num_reads, fp_d99_new);
  854. if (num_writes != num_reads) {
  855. WRITE_ERROR:
  856. fprintf (aa_stderr,
  857. CATGETS(dtsearch_catd, MS_dtsrclean, 665,
  858. "%s Write error on %s: %s.\n"),
  859. PROGNAME"665", fname_d99_new, strerror(errno));
  860. end_of_job (4, SHOW_PROGRESS + SHOW_EXITCODE);
  861. }
  862. } /* endelse for NOT validation_mode for this block */
  863. } /* end loop for all blocks for this entire word
  864. * (done = TRUE) */
  865. /* write the updated d2x record */
  866. write_d2x (&d23new, keyfield);
  867. reccount++;
  868. /*
  869. * Every now and then print a dot. Print complete progress
  870. * msg after DOTS_PER_MSG dots.
  871. */
  872. if (!(reccount % recs_per_dot)) {
  873. if (++dot_count > DOTS_PER_MSG) {
  874. dot_count = 0;
  875. print_progress ("Progress");
  876. }
  877. else {
  878. fputc ('.', aa_stderr);
  879. need_linefeed = TRUE;
  880. if (!(dot_count % 10L))
  881. fputc (' ', aa_stderr);
  882. }
  883. fflush (aa_stderr);
  884. } /* end of print-a-dot */
  885. if (shutdown_now)
  886. end_of_job (shutdown_now, SHOW_PROGRESS + SHOW_EXITCODE);
  887. KEYNEXT (PROGNAME "196", keyfield, 0);
  888. } /* end of main loop on each word in database */
  889. return;
  890. } /* copy_new_d99() */
  891. /************************************************/
  892. /* */
  893. /* main() */
  894. /* */
  895. /************************************************/
  896. int main (int argc, char *argv[])
  897. {
  898. FILE_HEADER fl_hdr;
  899. int a, i, j;
  900. unsigned char *bvptr;
  901. DB_ADDR dba, dba1, dbaorig;
  902. char dbfpath[1024];
  903. char fname_d21_new[1024];
  904. char fname_d21_old[1024];
  905. char fname_d22_new[1024];
  906. char fname_d22_old[1024];
  907. char fname_d23_new[1024];
  908. char fname_d23_old[1024];
  909. FILE *fp_d21_new = NULL;
  910. FILE *fp_d21_old = NULL;
  911. FILE *fp_d22_new = NULL;
  912. FILE *fp_d22_old = NULL;
  913. FILE *fp_d23_new = NULL;
  914. FILE *fp_d23_old = NULL;
  915. char full_dbname_old[1024];
  916. char full_dbname_new[1024];
  917. DtSrINT32 max_bitvec = 0L;
  918. int oops;
  919. char *ptr;
  920. char readbuf[1024 + 32];
  921. unsigned long reads_per_dot;
  922. char recidbuf[DtSrMAX_DB_KEYSIZE + 4];
  923. time_t starttime;
  924. DtSrINT32 x;
  925. struct or_dbrec dbrec;
  926. aa_argv0 = argv[0];
  927. setlocale (LC_ALL, "");
  928. dtsearch_catd = CATOPEN(FNAME_DTSRCAT, 0);
  929. time (&starttime);
  930. strftime (dbfpath, sizeof (dbfpath), /* just use any ol' buffer */
  931. CATGETS(dtsearch_catd, MS_misc, 22, "%A, %b %d %Y, %I:%M %p"),
  932. localtime (&starttime));
  933. printf ( CATGETS(dtsearch_catd, MS_dtsrclean, 11,
  934. "%s Version %s. Run %s.\n") ,
  935. aa_argv0, AUSAPI_VERSION, dbfpath);
  936. signal (SIGHUP, signal_shutdown);
  937. signal (SIGINT, signal_shutdown);
  938. signal (SIGQUIT, signal_shutdown);
  939. signal (SIGTRAP, signal_shutdown);
  940. signal (SIGKILL, signal_shutdown); /* this cannot be trapped */
  941. signal (SIGALRM, signal_shutdown);
  942. signal (SIGTERM, signal_shutdown);
  943. #ifdef SIGPWR
  944. signal (SIGPWR, signal_shutdown);
  945. #endif
  946. #ifdef _AIX
  947. signal (SIGXCPU, signal_shutdown);
  948. signal (SIGDANGER, signal_shutdown);
  949. #endif
  950. user_args_processor (argc, argv);
  951. /* In order to find old files, we have to check if
  952. * DBFPATH environment variable has been set.
  953. * Load the fully constructed DBFPATH-dbname into its own buffer.
  954. */
  955. full_dbname_old[0] = '\0';
  956. dbfpath[0] = 0;
  957. if ((ptr = getenv ("DBFPATH")) != NULL) {
  958. if (*ptr == 0)
  959. fprintf (aa_stderr,
  960. CATGETS(dtsearch_catd, MS_dtsrclean, 12,
  961. "%s: Ignoring empty DBFPATH environment variable.\n") ,
  962. aa_argv0);
  963. else {
  964. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 13,
  965. "%s: Using DBFPATH = '%s'.\n") ,
  966. aa_argv0, ptr);
  967. snprintf(full_dbname_old, sizeof(full_dbname_old), "%s", ptr);
  968. /* Ensure that DBFPATH ends in a slash. */
  969. ptr = strchr (full_dbname_old, '\0');
  970. if (*(ptr - 1) != LOCAL_SLASH) {
  971. *ptr++ = LOCAL_SLASH;
  972. *ptr = '\0';
  973. }
  974. strcpy (dbfpath, full_dbname_old);
  975. }
  976. }
  977. /* Currently full_dbname_old contains just the path.
  978. * Similarly, build just path name for the 2 new files
  979. * using full_dbname_new as a buffer.
  980. * Verify they don't both refer to the same directory.
  981. */
  982. strcpy (full_dbname_new, arg_newpath);
  983. ptr = strchr (full_dbname_new, '\0');
  984. if (*(ptr - 1) != LOCAL_SLASH) {
  985. *ptr++ = LOCAL_SLASH;
  986. *ptr = '\0';
  987. }
  988. if (strcmp (full_dbname_old, full_dbname_new) == 0) {
  989. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 393,
  990. "%s Old and new directories are identical: '%s'.\n"),
  991. PROGNAME"393", full_dbname_old);
  992. end_of_job (2, SHOW_USAGE);
  993. }
  994. /* Complete full_dbname_old by appending dbname to the path prefix.
  995. * Then build full path/file names for all 4 files.
  996. */
  997. strcat (full_dbname_old, arg_dbname);
  998. strcat (full_dbname_new, arg_dbname);
  999. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 14,
  1000. "%s: Old files: '%s.d2x, .d99'.\n") ,
  1001. aa_argv0, full_dbname_old);
  1002. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 15,
  1003. "%s: New files: '%s.d2x, .d99'.\n") ,
  1004. aa_argv0, full_dbname_new);
  1005. strcpy (fname_d99_old, full_dbname_old);
  1006. strcat (fname_d99_old, ".d99");
  1007. strcpy (fname_d21_old, full_dbname_old);
  1008. strcat (fname_d21_old, ".d21");
  1009. strcpy (fname_d22_old, full_dbname_old);
  1010. strcat (fname_d22_old, ".d22");
  1011. strcpy (fname_d23_old, full_dbname_old);
  1012. strcat (fname_d23_old, ".d23");
  1013. strcpy (fname_d99_new, full_dbname_new);
  1014. strcat (fname_d99_new, ".d99");
  1015. strcpy (fname_d21_new, full_dbname_new);
  1016. strcat (fname_d21_new, ".d21");
  1017. strcpy (fname_d22_new, full_dbname_new);
  1018. strcat (fname_d22_new, ".d22");
  1019. strcpy (fname_d23_new, full_dbname_new);
  1020. strcat (fname_d23_new, ".d23");
  1021. /* If the user hasn't already authorized overwriting preexisting files,
  1022. * check new directory and if new files already exist,
  1023. * ask permission to overwrite.
  1024. */
  1025. if (!overlay_yes) {
  1026. oops = FALSE; /* TRUE forces a user prompt */
  1027. if ((fp_d99_new = fopen (fname_d99_new, "r")) != NULL) {
  1028. fclose (fp_d99_new);
  1029. oops = TRUE;
  1030. }
  1031. if ((fp_d21_new = fopen (fname_d21_new, "r")) != NULL) {
  1032. fclose (fp_d21_new);
  1033. oops = TRUE;
  1034. }
  1035. if ((fp_d22_new = fopen (fname_d22_new, "r")) != NULL) {
  1036. fclose (fp_d22_new);
  1037. oops = TRUE;
  1038. }
  1039. if ((fp_d23_new = fopen (fname_d23_new, "r")) != NULL) {
  1040. fclose (fp_d23_new);
  1041. oops = TRUE;
  1042. }
  1043. if (oops) {
  1044. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 24,
  1045. "%s: One or more new files already exist.\n") ,
  1046. aa_argv0);
  1047. if (overlay_no) {
  1048. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 463,
  1049. "%s Command line argument disallows file overlay.\n"),
  1050. PROGNAME"463");
  1051. end_of_job (2, SHOW_EXITCODE);
  1052. }
  1053. fputs (CATGETS(dtsearch_catd, MS_dtsrclean, 45,
  1054. " Is it ok to overlay files in new directory? [y/n] "),
  1055. aa_stderr);
  1056. *readbuf = '\0';
  1057. if(NULL == fgets (readbuf, sizeof(readbuf), stdin)) {
  1058. fprintf (aa_stderr, "Failed to read from stdin\n");
  1059. end_of_job (2, SHOW_EXITCODE);
  1060. }
  1061. if (strlen(readbuf) && readbuf[strlen(readbuf)-1] == '\n')
  1062. readbuf[strlen(readbuf)-1] = '\0';
  1063. if (tolower (*readbuf) != 'y')
  1064. end_of_job (2, SHOW_NOTHING);
  1065. }
  1066. } /* end of check for overlaying new files */
  1067. /* Open all files. The d2x's are opened so that the old ones
  1068. * can be copied into the new directory before starting
  1069. * the garbage collection process proper.
  1070. * The d99's are opened now just to verify permissions.
  1071. */
  1072. oops = FALSE; /* TRUE ends job, but only after trying all 4 files */
  1073. open_all_files (&fp_d21_old, fname_d21_old, "rb", &size_d21_old, &oops);
  1074. open_all_files (&fp_d22_old, fname_d22_old, "rb", &size_d22_old, &oops);
  1075. open_all_files (&fp_d23_old, fname_d23_old, "rb", &size_d23_old, &oops);
  1076. open_all_files (&fp_d99_old, fname_d99_old, "rb", &size_d99_old, &oops);
  1077. open_all_files (&fp_d21_new, fname_d21_new, "wb", NULL, &oops);
  1078. open_all_files (&fp_d22_new, fname_d22_new, "wb", NULL, &oops);
  1079. open_all_files (&fp_d23_new, fname_d23_new, "wb", NULL, &oops);
  1080. open_all_files (&fp_d99_new, fname_d99_new, "wb", NULL, &oops);
  1081. if (shutdown_now)
  1082. end_of_job (shutdown_now, SHOW_EXITCODE);
  1083. if (oops)
  1084. end_of_job (2, SHOW_EXITCODE);
  1085. /* Copy old d2x files to new directory.
  1086. * Database will open using new files so only they will be changed.
  1087. */
  1088. copy_old_d2x_to_new (fname_d21_old, fname_d21_new, fp_d21_old, fp_d21_new);
  1089. copy_old_d2x_to_new (fname_d22_old, fname_d22_new, fp_d22_old, fp_d22_new);
  1090. copy_old_d2x_to_new (fname_d23_old, fname_d23_new, fp_d23_old, fp_d23_new);
  1091. /* Open database, but use new d2x files for updates. */
  1092. RENFILE (PROGNAME"1102", arg_dbname, OR_D21, fname_d21_new);
  1093. RENFILE (PROGNAME"1104", arg_dbname, OR_D22, fname_d22_new);
  1094. RENFILE (PROGNAME"1106", arg_dbname, OR_D23, fname_d23_new);
  1095. if (!austext_dopen (arg_dbname, (dbfpath[0] == 0) ? NULL : dbfpath,
  1096. NULL, 0, &dbrec)) {
  1097. puts (DtSearchGetMessages ());
  1098. end_of_job (3, SHOW_EXITCODE);
  1099. }
  1100. /* This is where efim changed real dba to
  1101. * record number (still called dba)
  1102. */
  1103. RECFRST (PROGNAME "1067", OR_OBJREC, 0);
  1104. CRGET (PROGNAME "1068", &dba, 0); /* dba of first real obj
  1105. * record */
  1106. recslots = dbrec.or_recslots; /* vista slots per obj
  1107. * record */
  1108. dba_offset = recslots - (dba & 0xffffff); /* accounts for dbrec */
  1109. /* total_num_addrs = what reccount would be if
  1110. * all holes were filled with good records.
  1111. */
  1112. total_num_addrs = (dbrec.or_maxdba - (dba & 0xffffff) + 1) / recslots + 1;
  1113. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 25,
  1114. "%s: curr reccnt=%ld, mxdba=%ld, sl/rec=%ld, tot#adr=%ld.\n") ,
  1115. aa_argv0, (long)dbrec.or_reccount, (long)dbrec.or_maxdba,
  1116. (long)dbrec.or_recslots, (long)total_num_addrs);
  1117. /* Initialize validation_mode (checkd99) */
  1118. if (validation_mode) {
  1119. /*
  1120. * Allocate and initialize a bit vector: 4 bits for every
  1121. * possible d00 database address.
  1122. */
  1123. max_bitvec = (total_num_addrs >> 1) + 2;
  1124. if ((bit_vector = malloc ((size_t)max_bitvec + 64)) == NULL) {
  1125. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 465,
  1126. "%s WARNING: Can't allocate memory for bit vector.\n"
  1127. " 'Validate' mode switched off.\n"),
  1128. PROGNAME"465");
  1129. validation_mode = FALSE;
  1130. normal_exitcode = 1; /* warning */
  1131. goto EXIT_INIT_VALIDATION;
  1132. }
  1133. memset (bit_vector, 0, (size_t)max_bitvec);
  1134. /*
  1135. * Read every d00 rec sequentially. 1 in bit #1 (lowest
  1136. * order) in bit vector means record (dba) exists in d00
  1137. * file. While we're at it, count the total number of
  1138. * records.
  1139. */
  1140. x = dbrec.or_reccount / 50 + 1; /* x = recs per dot */
  1141. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 26,
  1142. "%s: Reading d00 file. Each dot appx %ld database documents...\n"),
  1143. aa_argv0, (long)x);
  1144. reccount = 0;
  1145. dot_count = 0L;
  1146. RECFRST (PROGNAME "534", OR_OBJREC, 0);
  1147. while (db_status == S_OKAY) {
  1148. CRREAD (PROGNAME "617", OR_OBJKEY, recidbuf, 0);
  1149. /* print periodic progress dots */
  1150. if (!(++reccount % x)) {
  1151. fputc ('.', aa_stderr);
  1152. need_linefeed = TRUE;
  1153. if (!(++dot_count % 10L))
  1154. fputc (' ', aa_stderr);
  1155. fflush (aa_stderr);
  1156. }
  1157. /*
  1158. * Get dba and record number and confirm it will not
  1159. * overflow bit vector.
  1160. */
  1161. CRGET (PROGNAME "537", &dba, 0);
  1162. dba &= 0x00ffffff; /* mask out file number in high order byte */
  1163. dba1 = (dba + dba_offset) / recslots; /* ="rec number", base 1 */
  1164. if (dba1 >= total_num_addrs) {
  1165. TERMINATE_LINE ();
  1166. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 561,
  1167. "%s DBA '%d:%ld' (rec #%ld) in d00 exceeds "
  1168. "total num addrs %ld;\n"
  1169. " Bit vector overflow because maxdba %ld"
  1170. " in dbrec is incorrect.\n"),
  1171. PROGNAME"561", OR_D00, (long)dba, (long)dba1,
  1172. (long)total_num_addrs, (long)dbrec.or_maxdba);
  1173. end_of_job (7, SHOW_EXITCODE);
  1174. }
  1175. if (shutdown_now)
  1176. end_of_job (shutdown_now, SHOW_EXITCODE);
  1177. /*
  1178. * Set bit #1 of even or odd nibble to indicate that
  1179. * this record *number* actually exists in d00 file.
  1180. */
  1181. bit_vector[dba1 >> 1] |= (dba1 & 1L) ? 0x01 : 0x10;
  1182. RECNEXT (PROGNAME "541", 0);
  1183. } /* end of sequential read thru d00 file */
  1184. TERMINATE_LINE (); /* end the dots... */
  1185. /* confirm that RECCOUNT record holds the correct number */
  1186. if (dbrec.or_reccount == reccount) {
  1187. fprintf (aa_stderr,
  1188. CATGETS(dtsearch_catd, MS_dtsrclean, 27,
  1189. "%s: Confirmed %ld DOCUMENTS in %s.d00.\n") ,
  1190. aa_argv0, (long)dbrec.or_reccount, arg_dbname);
  1191. }
  1192. else {
  1193. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 28,
  1194. "%s: %ld DOCUMENTS actually in %s.d00 not ="
  1195. " %ld count stored there.\n"
  1196. " Count will be corrected in new d00 file.\n") ,
  1197. aa_argv0, (long)reccount, arg_dbname, (long)dbrec.or_reccount);
  1198. dbrec.or_reccount = reccount;
  1199. rewrite_reccount = TRUE;
  1200. }
  1201. EXIT_INIT_VALIDATION:;
  1202. } /* end of validation_mode initialization */
  1203. /* initialize main loop */
  1204. time (&timestart);
  1205. reccount = 0;
  1206. bytes_in = 0L;
  1207. dot_count = DOTS_PER_MSG; /* force initial msg after first
  1208. * blk of recs */
  1209. TERMINATE_LINE ();
  1210. fprintf (aa_stderr, CATGETS(dtsearch_catd, MS_dtsrclean, 29,
  1211. "%s: Compressing into %s. Each dot appx %lu words...\n") ,
  1212. aa_argv0, arg_newpath, (unsigned long)recs_per_dot);
  1213. /* write New Header Information to a new d99 file */
  1214. init_header (fp_d99_new, &fl_hdr);
  1215. /* Sequentially read each word key file in big loop.
  1216. * For each word, read the d99.
  1217. * In validation mode check the dbas.
  1218. * If not validating, just blindly rewrite the old d99 to the new one.
  1219. * If validating only write good dba's and mark the bit vector.
  1220. */
  1221. copy_new_d99 (OR_SWORDKEY);
  1222. copy_new_d99 (OR_LWORDKEY);
  1223. copy_new_d99 (OR_HWORDKEY);
  1224. if (reccount == 0)
  1225. end_of_job (50, SHOW_PROGRESS + SHOW_EXITCODE);
  1226. else
  1227. print_progress ("Final");
  1228. /* If validation_mode requested, traverse bit vector and print out
  1229. * table of each d00 record which cannot be accessed from any d99 word.
  1230. * If a validation file name was provided, write out a line for each
  1231. * bad reecord in alebeniz-compatible format.
  1232. */
  1233. if (validation_mode) {
  1234. for (x = 0, bvptr = bit_vector; x < max_bitvec; x++, bvptr++) {
  1235. for (j = 0; j < 8; j += 4) { /* j = 0 or 4, amount of
  1236. * bit shift */
  1237. /* a = bits #1 and #2 of current nibble */
  1238. a = 0x30 & (*bvptr << j);
  1239. /* if dba is in d00 but not in d99... */
  1240. if (a & 0x10 && !(a & 0x20)) {
  1241. /* ...construct valid vista dba */
  1242. dbaorig = x << 1;
  1243. if (j)
  1244. dbaorig++; /* slot number */
  1245. /*** dba = dbaorig | (OR_D00 << 24); ***//* r
  1246. * eal dba */
  1247. /* now efim retranslates back to real dba */
  1248. dba = ((dbaorig + 1) * recslots - dba_offset)
  1249. | (OR_D00 << 24);
  1250. /* ...print out err msg */
  1251. CRSET (PROGNAME "734", &dba, 0);
  1252. CRREAD (PROGNAME "735", OR_OBJKEY, readbuf, 0);
  1253. fprintf (aa_stderr,
  1254. CATGETS(dtsearch_catd, MS_dtsrclean, 444,
  1255. "*** %s d00 record '%s' is not referenced in d99.\n"
  1256. " DBA = %d:%ld (x%02x:%06lx).\n") ,
  1257. PROGNAME"444", readbuf, OR_D00,
  1258. (long)dba, OR_D00, (long)dba);
  1259. /*...if albeniz compatible output requested, do it */
  1260. if (frecids) {
  1261. fprintf (frecids, DISCARD_FORMAT, arg_dbname,
  1262. readbuf, "MrClean", datestr);
  1263. }
  1264. corruption_count++;
  1265. if (max_corruption > 0L &&
  1266. corruption_count >= max_corruption)
  1267. end_of_job (94, SHOW_EXITCODE);
  1268. } /* endif where d00 is not referenced by d99 */
  1269. } /* end forloop: every 2 bits in a bitvector byte */
  1270. } /* end forloop: every byte in bitvector */
  1271. }
  1272. /* Normal_exitcode currently will contain either a 0 or a 1.
  1273. * If we were uncorrupting the d99 and found any corrupt links,
  1274. * make sure it's 1 (warning). If there were corrupt links and
  1275. * we weren't trying to uncorrupt it, change it to a hard error.
  1276. */
  1277. /***by the way, corruption_count can be > 0 only if in validation_mode.**/
  1278. if (corruption_count > 0L) {
  1279. if (validation_mode)
  1280. normal_exitcode = 1;
  1281. else
  1282. normal_exitcode = 90;
  1283. }
  1284. end_of_job (normal_exitcode, SHOW_EXITCODE);
  1285. } /* main() */
  1286. /*************************** DTSRCLEAN.C ****************************/