123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- #include <u.h>
- #include <libc.h>
- #include <bio.h>
- #include "dict.h"
- /*
- * Use this to start making an index for a new dictionary.
- * Get the dictionary-specific nextoff and printentry(_,'h')
- * commands working, add a record to the dicts[] array below,
- * and run this program to get a list of offset,headword
- * pairs
- */
- Biobuf boutbuf;
- Biobuf *bdict;
- Biobuf *bout = &boutbuf;
- int linelen;
- int breaklen = 2000;
- int outinhibit;
- int debug;
- Dict *dict; /* current dictionary */
- Entry getentry(long);
- void
- main(int argc, char **argv)
- {
- int i;
- long a, ae;
- char *p;
- Entry e;
- Binit(&boutbuf, 1, OWRITE);
- dict = &dicts[0];
- ARGBEGIN {
- case 'd':
- p = ARGF();
- dict = 0;
- if(p) {
- for(i=0; dicts[i].name; i++)
- if(strcmp(p, dicts[i].name)==0) {
- dict = &dicts[i];
- break;
- }
- }
- if(!dict) {
- err("unknown dictionary: %s", p);
- exits("nodict");
- }
- break;
- case 'D':
- debug++;
- break;
- ARGEND }
- USED(argc,argv);
- bdict = Bopen(dict->path, OREAD);
- ae = Bseek(bdict, 0, 2);
- if(!bdict) {
- err("can't open dictionary %s", dict->path);
- exits("nodict");
- }
- for(a = 0; a < ae; a = (*dict->nextoff)(a+1)) {
- linelen = 0;
- e = getentry(a);
- Bprint(bout, "%ld\t", a);
- linelen = 4; /* only has to be approx right */
- (*dict->printentry)(e, 'h');
- }
- exits(0);
- }
- Entry
- getentry(long b)
- {
- long e, n, dtop;
- static Entry ans;
- static int anslen = 0;
- e = (*dict->nextoff)(b+1);
- ans.doff = b;
- if(e < 0) {
- dtop = Bseek(bdict, 0L, 2);
- if(b < dtop) {
- e = dtop;
- } else {
- err("couldn't seek to entry");
- ans.start = 0;
- ans.end = 0;
- }
- }
- n = e-b;
- if(n) {
- if(n > anslen) {
- ans.start = realloc(ans.start, n);
- if(!ans.start) {
- err("out of memory");
- exits("nomem");
- }
- anslen = n;
- }
- Bseek(bdict, b, 0);
- n = Bread(bdict, ans.start, n);
- ans.end = ans.start + n;
- }
- return ans;
- }
|