msexceltables.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766
  1. /* msexceltables.c Steve Simon 5-Jan-2005 */
  2. #include <u.h>
  3. #include <libc.h>
  4. #include <bio.h>
  5. #include <ctype.h>
  6. enum {
  7. Tillegal = 0,
  8. Tnumber, // cell types
  9. Tlabel,
  10. Tindex,
  11. Tbool,
  12. Terror,
  13. Ver8 = 0x600, // only BIFF8 and BIFF8x files support unicode
  14. Nwidths = 4096,
  15. };
  16. typedef struct Biff Biff;
  17. typedef struct Col Col;
  18. typedef struct Row Row;
  19. struct Row {
  20. Row *next; // next row
  21. int r; // row number
  22. Col *col; // list of cols in row
  23. };
  24. struct Col {
  25. Col *next; // next col in row
  26. int c; // col number
  27. int f; // index into formating table (Xf)
  28. int type; // type of value for union below
  29. union { // value
  30. int index; // index into string table (Strtab)
  31. int error;
  32. int bool;
  33. char *label;
  34. double number;
  35. };
  36. };
  37. struct Biff {
  38. Biobuf *bp; // input file
  39. int op; // current record type
  40. int len; // length of current record
  41. };
  42. // options
  43. static int Nopad = 0; // disable padding cells to colum width
  44. static int Trunc = 0; // truncate cells to colum width
  45. static int All = 0; // dump all sheet types, Worksheets only by default
  46. static char *Delim = " "; // field delimiter
  47. static int Debug = 0;
  48. // file scope
  49. static int Defwidth = 10; // default colum width if non given
  50. static int Biffver; // file vesion
  51. static int Datemode; // date ref: 1899-Dec-31 or 1904-jan-1
  52. static char **Strtab = nil; // label contents heap
  53. static int Nstrtab = 0; // # of above
  54. static int *Xf; // array of extended format indices
  55. static int Nxf = 0; // # of above
  56. static Biobuf *bo; // stdout (sic)
  57. // table scope
  58. static int Width[Nwidths]; // array of colum widths
  59. static int Ncols = -1; // max colums in table used
  60. static int Content = 0; // type code for contents of sheet
  61. static Row *Root = nil; // one worksheet's worth of cells
  62. static char *Months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
  63. "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
  64. static char *Errmsgs[] = {
  65. [0x0] "#NULL!", // intersection of two cell ranges is empty
  66. [0x7] "#DIV/0!", // division by zero
  67. [0xf] "#VALUE!", // wrong type of operand
  68. [0x17] "#REF!", // illegal or deleted cell reference
  69. [0x1d] "#NAME?", // wrong function or range name
  70. [0x24] "#NUM!", // value range overflow
  71. [0x2a] "#N/A!", // argument of function not available
  72. };
  73. void
  74. cell(int r, int c, int f, int type, void *val)
  75. {
  76. Row *row, *nrow;
  77. Col *col, *ncol;
  78. if (c > Ncols)
  79. Ncols = c;
  80. if ((ncol = malloc(sizeof(Col))) == nil)
  81. sysfatal("no memory\n");
  82. ncol->c = c;
  83. ncol->f = f;
  84. ncol->type = type;
  85. ncol->next = nil;
  86. switch(type){
  87. case Tnumber: ncol->number = *(double *)val; break;
  88. case Tlabel: ncol->label = (char *)val; break;
  89. case Tindex: ncol->index = *(int *)val; break;
  90. case Tbool: ncol->bool = *(int *)val; break;
  91. case Terror: ncol->error = *(int *)val; break;
  92. default: sysfatal("can't happen error\n");
  93. }
  94. if (Root == nil || Root->r > r){
  95. if ((nrow = malloc(sizeof(Row))) == nil)
  96. sysfatal("no memory\n");
  97. nrow->col = ncol;
  98. ncol->next = nil;
  99. nrow->r = r;
  100. nrow->next = Root;
  101. Root = nrow;
  102. return;
  103. }
  104. for (row = Root; row; row = row->next){
  105. if (row->r == r){
  106. if (row->col->c > c){
  107. ncol->next = row->col;
  108. row->col = ncol;
  109. return;
  110. }
  111. else{
  112. for (col = row->col; col; col = col->next)
  113. if (col->next == nil || col->next->c > c){
  114. ncol->next = col->next;
  115. col->next = ncol;
  116. return;
  117. }
  118. }
  119. }
  120. if (row->next == nil || row->next->r > r){
  121. if ((nrow = malloc(sizeof(Row))) == nil)
  122. sysfatal("no memory\n");
  123. nrow->col = ncol;
  124. nrow->r = r;
  125. nrow->next = row->next;
  126. row->next = nrow;
  127. return;
  128. }
  129. }
  130. sysfatal("cannot happen error\n");
  131. }
  132. struct Tm *
  133. bifftime(double num)
  134. {
  135. long long t = num;
  136. /* Beware - These epochs are wrong, this
  137. * is due to Excel still remaining compatible
  138. * with Lotus-123, which incorrectly believed 1900
  139. * was a leap year
  140. */
  141. if (Datemode)
  142. t -= 24107; // epoch = 1/1/1904
  143. else
  144. t -= 25569; // epoch = 31/12/1899
  145. t *= 60*60*24;
  146. return localtime((long)t);
  147. }
  148. void
  149. numfmt(int fmt, int min, int max, double num)
  150. {
  151. char buf[1024];
  152. struct Tm *tm;
  153. if (fmt == 9)
  154. snprint(buf, sizeof(buf),"%.0f%%", num);
  155. else
  156. if (fmt == 10)
  157. snprint(buf, sizeof(buf),"%f%%", num);
  158. else
  159. if (fmt == 11 || fmt == 48)
  160. snprint(buf, sizeof(buf),"%e", num);
  161. else
  162. if (fmt >= 14 && fmt <= 17){
  163. tm = bifftime(num);
  164. snprint(buf, sizeof(buf),"%d-%s-%d",
  165. tm->mday, Months[tm->mon], tm->year+1900);
  166. }
  167. else
  168. if ((fmt >= 18 && fmt <= 21) || (fmt >= 45 && fmt <= 47)){
  169. tm = bifftime(num);
  170. snprint(buf, sizeof(buf),"%02d:%02d:%02d", tm->hour, tm->min, tm->sec);
  171. }
  172. else
  173. if (fmt == 22){
  174. tm = bifftime(num);
  175. snprint(buf, sizeof(buf),"%02d:%02d:%02d %d-%s-%d",
  176. tm->hour, tm->min, tm->sec,
  177. tm->mday, Months[tm->mon], tm->year+1900);
  178. }else
  179. snprint(buf, sizeof(buf),"%g", num);
  180. Bprint(bo, "%-*.*q", min, max, buf);
  181. }
  182. void
  183. dump(void)
  184. {
  185. Row *r;
  186. Col *c;
  187. int i, min, max;
  188. for (r = Root; r; r = r->next){
  189. for (c = r->col; c; c = c->next){
  190. if (c->c < 0 || c->c >= Nwidths || (min = Width[c->c]) == 0)
  191. min = Defwidth;
  192. if ((c->next && c->c == c->next->c) || Nopad)
  193. min = 0;
  194. max = -1;
  195. if (Trunc && min > 2)
  196. max = min -2; // FIXME: -2 because of bug %q format ?
  197. switch(c->type){
  198. case Tnumber:
  199. if (Xf[c->f] == 0)
  200. Bprint(bo, "%-*.*g", min, max, c->number);
  201. else
  202. numfmt(Xf[c->f], min, max, c->number);
  203. break;
  204. case Tlabel:
  205. Bprint(bo, "%-*.*q", min, max, c->label);
  206. break;
  207. case Tbool:
  208. Bprint(bo, "%-*.*s", min, max, (c->bool)? "True": "False");
  209. break;
  210. case Tindex:
  211. if (c->index < 0 || c->index >= Nstrtab)
  212. sysfatal("SST string out of range - corrupt file?\n");
  213. Bprint(bo, "%-*.*q", min, max, Strtab[c->index]);
  214. break;
  215. case Terror:
  216. if (c->error < 0 || c->error >= nelem(Errmsgs) || !Errmsgs[c->error])
  217. Bprint(bo, "#ERR=%d", c->index);
  218. else
  219. Bprint(bo, "%-*.*q", min, max, Errmsgs[c->error]);
  220. break;
  221. default:
  222. sysfatal("cannot happen error\n");
  223. break;
  224. }
  225. if (c->next){
  226. if (c->next->c == c->c) // bar charts
  227. Bprint(bo, "=");
  228. else{
  229. Bprint(bo, "%s", Delim);
  230. for (i = c->c; c->next && i < c->next->c -1; i++)
  231. Bprint(bo, "%-*.*s%s", min, max, "", Delim);
  232. }
  233. }
  234. }
  235. if (r->next)
  236. for (i = r->r; i < r->next->r; i++)
  237. Bprint(bo, "\n");
  238. }
  239. Bprint(bo, "\n");
  240. }
  241. void
  242. release(void)
  243. {
  244. Row *r, *or;
  245. Col *c, *oc;
  246. r = Root;
  247. while(r){
  248. c = r->col;
  249. while(c){
  250. if (c->type == Tlabel)
  251. free(c->label);
  252. oc = c;
  253. c = c->next;
  254. free(oc);
  255. }
  256. or = r;
  257. r = r->next;
  258. free(or);
  259. }
  260. Root = nil;
  261. memset(Width, 0, sizeof(Width));
  262. Ncols = -1;
  263. }
  264. void
  265. skip(Biff *b, int len)
  266. {
  267. assert(len <= b->len);
  268. if (Bseek(b->bp, len, 1) == -1)
  269. sysfatal("seek failed - %r\n");
  270. b->len -= len;
  271. }
  272. void
  273. gmem(Biff *b, void *p, int n)
  274. {
  275. if (b->len < n)
  276. sysfatal("short record %d < %d\n", b->len, n);
  277. if (Bread(b->bp, p, n) != n)
  278. sysfatal("unexpected EOF - %r\n");
  279. b->len -= n;
  280. }
  281. void
  282. xd(Biff *b)
  283. {
  284. uvlong off;
  285. uchar buf[16];
  286. int addr, got, n, i, j;
  287. addr = 0;
  288. off = Boffset(b->bp);
  289. while (addr < b->len){
  290. n = (b->len >= sizeof(buf))? sizeof(buf): b->len;
  291. got = Bread(b->bp, buf, n);
  292. Bprint(bo, " %6d ", addr);
  293. addr += n;
  294. for (i = 0; i < got; i++)
  295. Bprint(bo, "%02x ", buf[i]);
  296. for (j = i; j < 16; j++)
  297. Bprint(bo, " ");
  298. Bprint(bo, " ");
  299. for (i = 0; i < got; i++)
  300. Bprint(bo, "%c", isprint(buf[i])? buf[i]: '.');
  301. Bprint(bo, "\n");
  302. }
  303. Bseek(b->bp, off, 0);
  304. }
  305. static int
  306. getrec(Biff *b)
  307. {
  308. int c;
  309. if ((c = Bgetc(b->bp)) == -1)
  310. return -1; // real EOF
  311. b->op = c;
  312. if ((c = Bgetc(b->bp)) == -1)
  313. sysfatal("unexpected EOF - %r\n");
  314. b->op |= c << 8;
  315. if ((c = Bgetc(b->bp)) == -1)
  316. sysfatal("unexpected EOF - %r\n");
  317. b->len = c;
  318. if ((c = Bgetc(b->bp)) == -1)
  319. sysfatal("unexpected EOF - %r\n");
  320. b->len |= c << 8;
  321. if (b->op == 0 && b->len == 0)
  322. return -1;
  323. if (Debug){
  324. Bprint(bo, "op=0x%x len=%d\n", b->op, b->len);
  325. xd(b);
  326. }
  327. return 0;
  328. }
  329. static uvlong
  330. gint(Biff *b, int n)
  331. {
  332. int i, c;
  333. uvlong vl, rc;
  334. if (b->len < n)
  335. return -1;
  336. rc = 0;
  337. for (i = 0; i < n; i++){
  338. if ((c = Bgetc(b->bp)) == -1)
  339. sysfatal("unexpected EOF - %r\n");
  340. b->len--;
  341. vl = c;
  342. rc |= vl << (8*i);
  343. }
  344. return rc;
  345. }
  346. double
  347. grk(Biff *b)
  348. {
  349. int f;
  350. uvlong n;
  351. double d;
  352. n = gint(b, 4);
  353. f = n & 3;
  354. n &= ~3LL;
  355. if (f & 2){
  356. d = n / 4.0;
  357. }
  358. else{
  359. n <<= 32;
  360. memcpy(&d, &n, sizeof(d));
  361. }
  362. if (f & 1)
  363. d /= 100.0;
  364. return d;
  365. }
  366. double
  367. gdoub(Biff *b)
  368. {
  369. double d;
  370. uvlong n = gint(b, 8);
  371. memcpy(&d, &n, sizeof(n));
  372. return d;
  373. }
  374. char *
  375. gstr(Biff *b, int len_width)
  376. {
  377. Rune r;
  378. char *buf, *p;
  379. int nch, w, ap, ln, rt, opt;
  380. enum {
  381. Unicode = 1,
  382. Asian_phonetic = 4,
  383. Rich_text = 8,
  384. };
  385. if (b->len < len_width){
  386. if (getrec(b) == -1)
  387. sysfatal("starting STRING expected CONTINUE, got EOF\n");
  388. if (b->op != 0x03c)
  389. sysfatal("starting STRING expected CONTINUE, got op=0x%x\n", b->op);
  390. }
  391. ln = gint(b, len_width);
  392. if (Biffver != Ver8){
  393. if ((buf = calloc(ln+1, sizeof(char))) == nil)
  394. sysfatal("no memory\n");
  395. gmem(b, buf, ln);
  396. return buf;
  397. }
  398. if ((buf = calloc(ln+1, sizeof(char)*UTFmax)) == nil)
  399. sysfatal("no memory\n");
  400. p = buf;
  401. if (ln == 0)
  402. return buf;
  403. nch = 0;
  404. *buf = 0;
  405. opt = gint(b, 1);
  406. if(opt & Rich_text)
  407. rt = gint(b, 2);
  408. else
  409. rt = 0;
  410. if(opt & Asian_phonetic)
  411. ap = gint(b, 4);
  412. else
  413. ap = 0;
  414. for(;;){
  415. w = (opt & Unicode)? sizeof(Rune): sizeof(char);
  416. while(b->len > 0){
  417. r = gint(b, w);
  418. p += runetochar(p, &r);
  419. if (++nch >= ln){
  420. if (rt)
  421. skip(b, rt*4);
  422. if (ap)
  423. skip(b, ap);
  424. return buf;
  425. }
  426. }
  427. if (getrec(b) == -1)
  428. sysfatal("in STRING expected CONTINUE, got EOF\n");
  429. if (b->op != 0x03c)
  430. sysfatal("in STRING expected CONTINUE, got op=0x%x\n", b->op);
  431. opt = gint(b, 1);
  432. }
  433. sysfatal("cannot ever happen error\n");
  434. return nil; // shut up 8c
  435. }
  436. void
  437. sst(Biff *b)
  438. {
  439. int n;
  440. skip(b, 4); // total # strings
  441. Nstrtab = gint(b, 4); // # unique strings
  442. if ((Strtab = calloc(Nstrtab, sizeof(char *))) == nil)
  443. sysfatal("no memory\n");
  444. for (n = 0; n < Nstrtab; n++)
  445. Strtab[n] = gstr(b, 2);
  446. }
  447. void
  448. boolerr(Biff *b)
  449. {
  450. int r = gint(b, 2); // row
  451. int c = gint(b, 2); // col
  452. int f = gint(b, 2); // formatting ref
  453. int v = gint(b, 1); // bool value / err code
  454. int t = gint(b, 1); // type
  455. cell(r, c, f, (t)? Terror: Tbool, &v);
  456. }
  457. void
  458. rk(Biff *b)
  459. {
  460. int r = gint(b, 2); // row
  461. int c = gint(b, 2); // col
  462. int f = gint(b, 2); // formatting ref
  463. double v = grk(b); // value
  464. cell(r, c, f, Tnumber, &v);
  465. }
  466. void
  467. mulrk(Biff *b)
  468. {
  469. int r = gint(b, 2); // row
  470. int c = gint(b, 2); // first col
  471. while (b->len >= 6){
  472. int f = gint(b, 2); // formatting ref
  473. double v = grk(b); // value
  474. cell(r, c++, f, Tnumber, &v);
  475. }
  476. }
  477. void
  478. number(Biff *b)
  479. {
  480. int r = gint(b, 2); // row
  481. int c = gint(b, 2); // col
  482. int f = gint(b, 2); // formatting ref
  483. double v = gdoub(b); // double
  484. cell(r, c, f, Tnumber, &v);
  485. }
  486. void
  487. label(Biff *b)
  488. {
  489. int r = gint(b, 2); // row
  490. int c = gint(b, 2); // col
  491. int f = gint(b, 2); // formatting ref
  492. char *s = gstr(b, 2); // byte string
  493. cell(r, c, f, Tlabel, s);
  494. }
  495. void
  496. labelsst(Biff *b)
  497. {
  498. int r = gint(b, 2); // row
  499. int c = gint(b, 2); // col
  500. int f = gint(b, 2); // formatting ref
  501. int i = gint(b, 2); // sst string ref
  502. cell(r, c, f, Tindex, &i);
  503. }
  504. void
  505. bof(Biff *b)
  506. {
  507. Biffver = gint(b, 2);
  508. Content = gint(b, 2);
  509. }
  510. void
  511. defcolwidth(Biff *b)
  512. {
  513. Defwidth = gint(b, 2);
  514. }
  515. void
  516. datemode(Biff *b)
  517. {
  518. Datemode = gint(b, 2);
  519. }
  520. void
  521. eof(Biff *b)
  522. {
  523. int i;
  524. struct {
  525. int n;
  526. char *s;
  527. } names[] = {
  528. 0x005, "Workbook globals",
  529. 0x006, "Visual Basic module",
  530. 0x010, "Worksheet",
  531. 0x020, "Chart",
  532. 0x040, "Macro sheet",
  533. 0x100, "Workspace file",
  534. };
  535. if (Ncols != -1){
  536. if (All){
  537. for (i = 0; i < nelem(names); i++)
  538. if (names[i].n == Content){
  539. Bprint(bo, "\n# contents %s\n", names[i].s);
  540. dump();
  541. }
  542. }
  543. else
  544. if (Content == 0x10)
  545. dump();
  546. }
  547. release();
  548. USED(b);
  549. }
  550. void
  551. colinfo(Biff *b)
  552. {
  553. int c;
  554. int c1 = gint(b, 2);
  555. int c2 = gint(b, 2);
  556. int w = gint(b, 2);
  557. if (c1 < 0)
  558. sysfatal("negative column number (%d)\n", c1);
  559. if (c2 >= Nwidths)
  560. sysfatal("too many columns (%d > %d)\n", c2, Nwidths);
  561. w /= 256;
  562. if (w > 100)
  563. w = 100;
  564. if (w < 0)
  565. w = 0;
  566. for (c = c1; c <= c2; c++)
  567. Width[c] = w;
  568. }
  569. void
  570. xf(Biff *b)
  571. {
  572. int fmt;
  573. static int nalloc = 0;
  574. skip(b, 2);
  575. fmt = gint(b, 2);
  576. if (nalloc >= Nxf){
  577. nalloc += 20;
  578. if ((Xf = realloc(Xf, nalloc*sizeof(int))) == nil)
  579. sysfatal("no memory\n");
  580. }
  581. Xf[Nxf++] = fmt;
  582. }
  583. void
  584. writeaccess(Biff *b)
  585. {
  586. Bprint(bo, "# author %s\n", gstr(b, 2));
  587. }
  588. void
  589. codepage(Biff *b)
  590. {
  591. int codepage = gint(b, 2);
  592. if (codepage != 1200) // 1200 == UTF-16
  593. Bprint(bo, "# codepage %d\n", codepage);
  594. }
  595. void
  596. xls2csv(Biobuf *bp)
  597. {
  598. int i;
  599. Biff biff, *b;
  600. struct {
  601. int op;
  602. void (*func)(Biff *);
  603. } dispatch[] = {
  604. 0x000a, eof,
  605. 0x0022, datemode,
  606. 0x0042, codepage,
  607. 0x0055, defcolwidth,
  608. 0x005c, writeaccess,
  609. 0x007d, colinfo,
  610. 0x00bd, mulrk,
  611. 0x00fc, sst,
  612. 0x00fd, labelsst,
  613. 0x0203, number,
  614. 0x0204, label,
  615. 0x0205, boolerr,
  616. 0x027e, rk,
  617. 0x0809, bof,
  618. 0x00e0, xf,
  619. };
  620. b = &biff;
  621. b->bp = bp;
  622. while(getrec(b) != -1){
  623. for (i = 0; i < nelem(dispatch); i++)
  624. if (b->op == dispatch[i].op)
  625. (*dispatch[i].func)(b);
  626. skip(b, b->len);
  627. }
  628. }
  629. void
  630. usage(void)
  631. {
  632. fprint(2, "usage: %s [-aDnt] [-d delim] /mnt/doc/Workbook\n", argv0);
  633. exits("usage");
  634. }
  635. void
  636. main(int argc, char *argv[])
  637. {
  638. int i;
  639. Biobuf bin, bout, *bp;
  640. ARGBEGIN{
  641. case 'n':
  642. Nopad = 1;
  643. break;
  644. case 't':
  645. Trunc = 1;
  646. break;
  647. case 'a':
  648. All = 1;
  649. break;
  650. case 'd':
  651. Delim = EARGF(usage());
  652. break;
  653. case 'D':
  654. Debug = 1;
  655. break;
  656. default:
  657. usage();
  658. break;
  659. }ARGEND;
  660. if (argc != 1)
  661. usage();
  662. bo = &bout;
  663. quotefmtinstall();
  664. Binit(bo, OWRITE, 1);
  665. if(argc > 0) {
  666. for(i = 0; i < argc; i++){
  667. if ((bp = Bopen(argv[i], OREAD)) == nil)
  668. sysfatal("%s cannot open - %r\n", argv[i]);
  669. xls2csv(bp);
  670. Bterm(bp);
  671. }
  672. } else {
  673. Binit(&bin, 0, OREAD);
  674. xls2csv(&bin);
  675. }
  676. exits(0);
  677. }