mkindex.c 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include "dict.h"
  5. /*
  6. * Use this to start making an index for a new dictionary.
  7. * Get the dictionary-specific nextoff and printentry(_,'h')
  8. * commands working, add a record to the dicts[] array below,
  9. * and run this program to get a list of offset,headword
  10. * pairs
  11. */
  12. Biobuf boutbuf;
  13. Biobuf *bdict;
  14. Biobuf *bout = &boutbuf;
  15. int linelen;
  16. int breaklen = 2000;
  17. int outinhibit;
  18. int debug;
  19. Dict *dict; /* current dictionary */
  20. Entry getentry(long);
  21. void
  22. main(int argc, char **argv)
  23. {
  24. int i;
  25. long a, ae;
  26. char *p;
  27. Entry e;
  28. Binit(&boutbuf, 1, OWRITE);
  29. dict = &dicts[0];
  30. ARGBEGIN {
  31. case 'd':
  32. p = ARGF();
  33. dict = 0;
  34. if(p) {
  35. for(i=0; dicts[i].name; i++)
  36. if(strcmp(p, dicts[i].name)==0) {
  37. dict = &dicts[i];
  38. break;
  39. }
  40. }
  41. if(!dict) {
  42. err("unknown dictionary: %s", p);
  43. exits("nodict");
  44. }
  45. break;
  46. case 'D':
  47. debug++;
  48. break;
  49. ARGEND }
  50. USED(argc,argv);
  51. bdict = Bopen(dict->path, OREAD);
  52. ae = Bseek(bdict, 0, 2);
  53. if(!bdict) {
  54. err("can't open dictionary %s", dict->path);
  55. exits("nodict");
  56. }
  57. for(a = 0; a < ae; a = (*dict->nextoff)(a+1)) {
  58. linelen = 0;
  59. e = getentry(a);
  60. Bprint(bout, "%ld\t", a);
  61. linelen = 4; /* only has to be approx right */
  62. (*dict->printentry)(e, 'h');
  63. }
  64. exits(0);
  65. }
  66. Entry
  67. getentry(long b)
  68. {
  69. long e, n, dtop;
  70. static Entry ans;
  71. static int anslen = 0;
  72. e = (*dict->nextoff)(b+1);
  73. ans.doff = b;
  74. if(e < 0) {
  75. dtop = Bseek(bdict, 0L, 2);
  76. if(b < dtop) {
  77. e = dtop;
  78. } else {
  79. err("couldn't seek to entry");
  80. ans.start = 0;
  81. ans.end = 0;
  82. }
  83. }
  84. n = e-b;
  85. if(n) {
  86. if(n > anslen) {
  87. ans.start = realloc(ans.start, n);
  88. if(!ans.start) {
  89. err("out of memory");
  90. exits("nomem");
  91. }
  92. anslen = n;
  93. }
  94. Bseek(bdict, b, 0);
  95. n = Bread(bdict, ans.start, n);
  96. ans.end = ans.start + n;
  97. }
  98. return ans;
  99. }