msexceltables.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725
  1. /* msexceltables.c Steve Simon 5-Jan-2005 */
  2. #include <u.h>
  3. #include <libc.h>
  4. #include <bio.h>
  5. #include <ctype.h>
  6. enum {
  7. Tillegal = 0,
  8. Tnumber, // cell types
  9. Tlabel,
  10. Tindex,
  11. Tbool,
  12. Terror,
  13. Ver8 = 0x600, // only BIFF8 and BIFF8x files support unicode
  14. };
  15. typedef struct Biff Biff;
  16. typedef struct Col Col;
  17. typedef struct Row Row;
  18. struct Row {
  19. Row *next;
  20. int r;
  21. Col *col;
  22. };
  23. struct Col {
  24. Col *next;
  25. int c;
  26. int f;
  27. int type;
  28. union {
  29. int index;
  30. int error;
  31. int bool;
  32. char *label;
  33. double number;
  34. };
  35. };
  36. struct Biff {
  37. Biobuf *bp;
  38. int op;
  39. int len;
  40. };
  41. // options
  42. static int Nopad = 0; // disable padding cells to colum width
  43. static int Trunc = 0; // truncate cells to colum width
  44. static int All = 0; // dump all sheet types, Worksheets only by default
  45. static char *Delim = " "; // field delimiter
  46. static int Debug = 0;
  47. // file scope
  48. static int Defwidth = 10; // default colum width if non given
  49. static int Biffver; // file vesion
  50. static int Datemode; // date ref: 1899-Dec-31 or 1904-jan-1
  51. static char **Strtab = nil; // label contents heap
  52. static int Nstrtab = 0; // # of above
  53. static int *Xf; // array of extended format indices
  54. static int Nxf = 0; // # of above
  55. static Biobuf *bo; // stdout (sic)
  56. // table scope
  57. static int *Width = nil; // array of colum widths
  58. static int Nwidths = 0; // # of above
  59. static int Ncols = -1; // max colums in table used
  60. static int Content = 0; // type code for contents of sheet
  61. static Row *Root = nil; // one worksheet's worth of cells
  62. static char *Months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
  63. "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
  64. static char *Errmsgs[] = {
  65. [0x0] "#NULL!", // intersection of two cell ranges is empty
  66. [0x7] "#DIV/0!", // division by zero
  67. [0xf] "#VALUE!", // wrong type of operand
  68. [0x17] "#REF!", // illegal or deleted cell reference
  69. [0x1d] "#NAME?", // wrong function or range name
  70. [0x24] "#NUM!", // value range overflow
  71. [0x2a] "#N/A!", // argument of function not available
  72. };
  73. void
  74. cell(int r, int c, int f, int type, void *val)
  75. {
  76. Row *row, *nrow;
  77. Col *col, *ncol;
  78. if (c > Ncols)
  79. Ncols = c;
  80. if ((ncol = malloc(sizeof(Col))) == nil)
  81. sysfatal("no memory\n");
  82. ncol->c = c;
  83. ncol->f = f;
  84. ncol->type = type;
  85. ncol->next = nil;
  86. switch(type){
  87. case Tnumber: ncol->number = *(double *)val; break;
  88. case Tlabel: ncol->label = (char *)val; break;
  89. case Tindex: ncol->index = *(int *)val; break;
  90. case Tbool: ncol->bool = *(int *)val; break;
  91. case Terror: ncol->error = *(int *)val; break;
  92. default: sysfatal("can't happen error\n");
  93. }
  94. if (Root == nil || Root->r > r){
  95. if ((nrow = malloc(sizeof(Row))) == nil)
  96. sysfatal("no memory\n");
  97. nrow->col = ncol;
  98. ncol->next = nil;
  99. nrow->r = r;
  100. nrow->next = Root;
  101. Root = nrow;
  102. return;
  103. }
  104. for (row = Root; row; row = row->next){
  105. if (row->r == r){
  106. if (row->col->c > c){
  107. ncol->next = row->col;
  108. row->col = ncol;
  109. return;
  110. }
  111. else{
  112. for (col = row->col; col; col = col->next)
  113. if (col->next == nil || col->next->c > c){
  114. ncol->next = col->next;
  115. col->next = ncol;
  116. return;
  117. }
  118. }
  119. }
  120. if (row->next == nil || row->next->r > r){
  121. if ((nrow = malloc(sizeof(Row))) == nil)
  122. sysfatal("no memory\n");
  123. nrow->col = ncol;
  124. nrow->r = r;
  125. nrow->next = row->next;
  126. row->next = nrow;
  127. return;
  128. }
  129. }
  130. sysfatal("cannot happen error\n");
  131. }
  132. void
  133. numfmt(int fmt, int min, int max, double num)
  134. {
  135. long t;
  136. char buf[1024];
  137. struct Tm *tm;
  138. /* Beware - These epochs are wrong, this
  139. * is to remain compatible with Lotus-123
  140. * which believed 1900 was a leap year
  141. */
  142. if (Datemode)
  143. t = (num-24107)*60*60*24; // epoch = 1/1/1904
  144. else
  145. t = (num-25569)*60*60*24; // epoch = 31/12/1899
  146. tm = localtime(t);
  147. if (fmt == 9)
  148. snprint(buf, sizeof(buf),"%.0f%%", num);
  149. else
  150. if (fmt == 10)
  151. snprint(buf, sizeof(buf),"%f%%", num);
  152. else
  153. if (fmt == 11 || fmt == 48)
  154. snprint(buf, sizeof(buf),"%e", num);
  155. else
  156. if (fmt >= 14 && fmt <= 17)
  157. snprint(buf, sizeof(buf),"%d-%s-%d",
  158. tm->mday, Months[tm->mon], tm->year+1900);
  159. else
  160. if ((fmt >= 18 && fmt <= 21) || (fmt >= 45 && fmt <= 47))
  161. snprint(buf, sizeof(buf),"%02d:%02d:%02d", tm->hour, tm->min, tm->sec);
  162. else
  163. if (fmt == 22)
  164. snprint(buf, sizeof(buf),"%02d:%02d:%02d %d-%s-%d",
  165. tm->hour, tm->min, tm->sec,
  166. tm->mday, Months[tm->mon], tm->year+1900);
  167. else
  168. snprint(buf, sizeof(buf),"%g", num);
  169. Bprint(bo, "%-*.*q", min, max, buf);
  170. }
  171. void
  172. dump(void)
  173. {
  174. Row *r;
  175. Col *c;
  176. int i, min, max;
  177. for (r = Root; r; r = r->next){
  178. for (c = r->col; c; c = c->next){
  179. if (c->c < 0 || c->c >= Nwidths || (min = Width[c->c]) == 0)
  180. min = Defwidth;
  181. if ((c->next && c->c == c->next->c) || Nopad)
  182. min = 0;
  183. max = -1;
  184. if (Trunc && min > 2)
  185. max = min -2; // FIXME: -2 because of bug %q format ?
  186. switch(c->type){
  187. case Tnumber:
  188. if (Xf[c->f] == 0)
  189. Bprint(bo, "%-*.*g", min, max, c->number);
  190. else
  191. numfmt(Xf[c->f], min, max, c->number);
  192. break;
  193. case Tlabel:
  194. Bprint(bo, "%-*.*q", min, max, c->label);
  195. break;
  196. case Tbool:
  197. Bprint(bo, "%-*.*s", min, max, (c->bool)? "True": "False");
  198. break;
  199. case Tindex:
  200. if (c->error < 0 || c->error >= Nstrtab)
  201. sysfatal("SST string out of range - corrupt file?\n");
  202. Bprint(bo, "%-*.*q", min, max, Strtab[c->index]);
  203. break;
  204. case Terror:
  205. if (c->error < 0 || c->error >= nelem(Errmsgs))
  206. Bprint(bo, "#ERR=%d", c->index);
  207. else
  208. Bprint(bo, "%-*.*q", min, max, Errmsgs[c->error]);
  209. break;
  210. default:
  211. sysfatal("cannot happen error\n");
  212. break;
  213. }
  214. if (c->next){
  215. if (c->next->c == c->c) // bar charts
  216. Bprint(bo, "=");
  217. else{
  218. Bprint(bo, "%s", Delim);
  219. for (i = c->c; c->next && i < c->next->c -1; i++)
  220. Bprint(bo, "%-*.*s%s", min, max, "", Delim);
  221. }
  222. }
  223. }
  224. if (r->next)
  225. for (i = r->r; i < r->next->r; i++)
  226. Bprint(bo, "\n");
  227. }
  228. Bprint(bo, "\n");
  229. }
  230. void
  231. release(void)
  232. {
  233. Row *r, *or;
  234. Col *c, *oc;
  235. r = Root;
  236. while(r){
  237. c = r->col;
  238. while(c){
  239. if (c->type == Tlabel)
  240. free(c->label);
  241. oc = c;
  242. c = c->next;
  243. free(oc);
  244. }
  245. or = r;
  246. r = r->next;
  247. free(or);
  248. }
  249. Root = nil;
  250. free(Width);
  251. Width = nil;
  252. Nwidths = 0;
  253. Ncols = -1;
  254. }
  255. static int
  256. getrec(Biff *b)
  257. {
  258. int c;
  259. if ((c = Bgetc(b->bp)) == -1)
  260. return -1; // real EOF
  261. b->op = c;
  262. if ((c = Bgetc(b->bp)) == -1)
  263. sysfatal("unexpected EOF - %r\n");
  264. b->op |= c << 8;
  265. if ((c = Bgetc(b->bp)) == -1)
  266. sysfatal("unexpected EOF - %r\n");
  267. b->len = c;
  268. if ((c = Bgetc(b->bp)) == -1)
  269. sysfatal("unexpected EOF - %r\n");
  270. b->len |= c << 8;
  271. if (b->op == 0 && b->len == 0)
  272. return -1;
  273. return 0;
  274. }
  275. static uvlong
  276. gint(Biff *b, int n)
  277. {
  278. int i, c;
  279. uvlong vl, rc;
  280. if (b->len < n)
  281. return -1;
  282. rc = 0;
  283. for (i = 0; i < n; i++){
  284. if ((c = Bgetc(b->bp)) == -1)
  285. sysfatal("unexpected EOF - %r\n");
  286. b->len--;
  287. vl = c;
  288. rc |= vl << (8*i);
  289. }
  290. return rc;
  291. }
  292. void
  293. skip(Biff *b, int len)
  294. {
  295. if (Bseek(b->bp, len, 1) == -1)
  296. sysfatal("seek failed - %r\n");
  297. b->len -= len;
  298. }
  299. void
  300. gmem(Biff *b, void *p, int n)
  301. {
  302. if (b->len < n)
  303. sysfatal("short record %d < %d\n", b->len, n);
  304. if (Bread(b->bp, p, n) != n)
  305. sysfatal("unexpected EOF - %r\n");
  306. b->len -= n;
  307. }
  308. double
  309. grk(Biff *b)
  310. {
  311. int f;
  312. uvlong n;
  313. double d;
  314. n = gint(b, 4);
  315. f = n & 3;
  316. n &= ~3LL;
  317. if (f & 2){
  318. d = n / 4.0;
  319. }
  320. else{
  321. n <<= 32;
  322. memcpy(&d, &n, sizeof(d));
  323. }
  324. if (f & 1)
  325. d /= 100.0;
  326. return d;
  327. }
  328. double
  329. gdoub(Biff *b)
  330. {
  331. double d;
  332. uvlong n = gint(b, 8);
  333. memcpy(&d, &n, sizeof(n));
  334. return d;
  335. }
  336. char *
  337. gstr(Biff *b, int len_width)
  338. {
  339. Rune r;
  340. int len, opt;
  341. char *buf, *p;
  342. if (b->len == 0){
  343. if (getrec(b) == -1)
  344. sysfatal("expected CONTINUE, got EOF\n");
  345. if (b->op != 0x03c)
  346. sysfatal("expected CONTINUE, got op=0x%x\n", b->op);
  347. }
  348. switch(len_width){
  349. case 16: len = gint(b, 2); break;
  350. case 8: len = gint(b, 1); break;
  351. default: sysfatal("can't happen error\n"); SET(len); break;
  352. }
  353. if (Biffver != Ver8){
  354. if ((buf = calloc(len+1, sizeof(char))) == nil)
  355. sysfatal("no memory\n");
  356. gmem(b, buf, len);
  357. return buf;
  358. }
  359. if ((buf = calloc(len+1, sizeof(char)*UTFmax)) == nil)
  360. sysfatal("no memory\n");
  361. p = buf;
  362. if (len == 0)
  363. return buf;
  364. opt = gint(b, 1);
  365. if (opt & 0x0c)
  366. sysfatal("Can't parse rich or Asian phonetic text - no documentation!\n");
  367. while(len--){
  368. r = gint(b, (opt & 1)? sizeof(Rune): sizeof(char));
  369. p += runetochar(p, &r);
  370. }
  371. return buf;
  372. }
  373. void
  374. xd(Biff *b)
  375. {
  376. uchar buf[16];
  377. int addr, n, i, j;
  378. addr = 0;
  379. while (b->len){
  380. n = (b->len >= sizeof(buf))? sizeof(buf): b->len;
  381. gmem(b, buf, n);
  382. Bprint(bo, " %6d ", addr);
  383. addr += n;
  384. for (i = 0; i < n; i++)
  385. Bprint(bo, "%02x ", buf[i]);
  386. for (j = i; j < 16; j++)
  387. Bprint(bo, " ");
  388. Bprint(bo, " ");
  389. for (i = 0; i < n; i++)
  390. Bprint(bo, "%c", isprint(buf[i])? buf[i]: '.');
  391. Bprint(bo, "\n");
  392. }
  393. }
  394. void
  395. sst(Biff *b)
  396. {
  397. int n;
  398. skip(b, 4); // total # strings
  399. Nstrtab = gint(b, 4); // # unique strings
  400. if ((Strtab = calloc(Nstrtab, sizeof(char *))) == nil)
  401. sysfatal("no memory\n");
  402. for (n = 0; n < Nstrtab; n++)
  403. Strtab[n] = gstr(b, 16);
  404. }
  405. void
  406. boolerr(Biff *b)
  407. {
  408. int r = gint(b, 2); // row
  409. int c = gint(b, 2); // col
  410. int f = gint(b, 2); // formatting ref
  411. int v = gint(b, 1); // bool value / err code
  412. int t = gint(b, 1); // type
  413. cell(r, c, f, (t)? Terror: Tbool, &v);
  414. }
  415. void
  416. rk(Biff *b)
  417. {
  418. int r = gint(b, 2); // row
  419. int c = gint(b, 2); // col
  420. int f = gint(b, 2); // formatting ref
  421. double v = grk(b); // value
  422. cell(r, c, f, Tnumber, &v);
  423. }
  424. void
  425. mulrk(Biff *b)
  426. {
  427. int r = gint(b, 2); // row
  428. int c = gint(b, 2); // first col
  429. while (b->len >= 6){
  430. int f = gint(b, 2); // formatting ref
  431. double v = grk(b); // value
  432. cell(r, c++, f, Tnumber, &v);
  433. }
  434. }
  435. void
  436. number(Biff *b)
  437. {
  438. int r = gint(b, 2); // row
  439. int c = gint(b, 2); // col
  440. int f = gint(b, 2); // formatting ref
  441. double v = gdoub(b); // double
  442. cell(r, c, f, Tnumber, &v);
  443. }
  444. void
  445. label(Biff *b)
  446. {
  447. int r = gint(b, 2); // row
  448. int c = gint(b, 2); // col
  449. int f = gint(b, 2); // formatting ref
  450. char *s = gstr(b, 16); // byte string
  451. cell(r, c, f, Tlabel, s);
  452. }
  453. void
  454. labelsst(Biff *b)
  455. {
  456. int r = gint(b, 2); // row
  457. int c = gint(b, 2); // col
  458. int f = gint(b, 2); // formatting ref
  459. int i = gint(b, 2); // sst string ref
  460. cell(r, c, f, Tindex, &i);
  461. }
  462. void
  463. bof(Biff *b)
  464. {
  465. Biffver = gint(b, 2);
  466. Content = gint(b, 2);
  467. }
  468. void
  469. defcolwidth(Biff *b)
  470. {
  471. Defwidth = gint(b, 2);
  472. }
  473. void
  474. datemode(Biff *b)
  475. {
  476. Datemode = gint(b, 2);
  477. }
  478. void
  479. eof(Biff *b)
  480. {
  481. int i;
  482. struct {
  483. int n;
  484. char *s;
  485. } names[] = {
  486. 0x005, "Workbook globals",
  487. 0x006, "Visual Basic module",
  488. 0x010, "Worksheet",
  489. 0x020, "Chart",
  490. 0x040, "Macro sheet",
  491. 0x100, "Workspace file",
  492. };
  493. if (Ncols != -1){
  494. if (All){
  495. for (i = 0; i < nelem(names); i++)
  496. if (names[i].n == Content){
  497. Bprint(bo, "\n# contents %s\n", names[i].s);
  498. dump();
  499. }
  500. }
  501. else
  502. if (Content == 0x10)
  503. dump();
  504. }
  505. release();
  506. USED(b);
  507. }
  508. void
  509. colinfo(Biff *b)
  510. {
  511. int c;
  512. int c1 = gint(b, 2);
  513. int c2 = gint(b, 2);
  514. int w = gint(b, 2);
  515. if (c2 >= Nwidths){
  516. Nwidths = c2+20;
  517. if ((Width = realloc(Width, Nwidths*sizeof(int))) == nil)
  518. sysfatal("no memory\n");
  519. }
  520. w /= 256;
  521. if (w > 100)
  522. w = 100;
  523. if (w < 0)
  524. w = 0;
  525. for (c = c1; c <= c2; c++)
  526. Width[c] = w;
  527. }
  528. void
  529. xf(Biff *b)
  530. {
  531. int fmt;
  532. static int nalloc = 0;
  533. skip(b, 2);
  534. fmt = gint(b, 2);
  535. if (nalloc >= Nxf){
  536. nalloc += 20;
  537. if ((Xf = realloc(Xf, nalloc*sizeof(int))) == nil)
  538. sysfatal("no memory\n");
  539. }
  540. Xf[Nxf++] = fmt;
  541. }
  542. void
  543. writeaccess(Biff *b)
  544. {
  545. Bprint(bo, "# author %s\n", gstr(b, 16));
  546. }
  547. void
  548. codepage(Biff *b)
  549. {
  550. int codepage = gint(b, 2);
  551. if (codepage != 1200) // 1200 == UTF-16
  552. Bprint(bo, "# codepage %d\n", codepage);
  553. }
  554. void
  555. xls2csv(Biobuf *bp)
  556. {
  557. int i;
  558. Biff biff, *b;
  559. struct {
  560. int op;
  561. void (*func)(Biff *);
  562. } dispatch[] = {
  563. 0x00a, eof,
  564. 0x022, datemode,
  565. 0x042, codepage,
  566. 0x055, defcolwidth,
  567. 0x05c, writeaccess,
  568. 0x07d, colinfo,
  569. 0x0bd, mulrk,
  570. 0x0fc, sst,
  571. 0x0fd, labelsst,
  572. 0x203, number,
  573. 0x204, label,
  574. 0x205, boolerr,
  575. 0x27e, rk,
  576. 0x809, bof,
  577. 0x0e0, xf,
  578. };
  579. b = &biff;
  580. b->bp = bp;
  581. while(getrec(b) != -1){
  582. for (i = 0; i < nelem(dispatch); i++)
  583. if (b->op == dispatch[i].op)
  584. (*dispatch[i].func)(b);
  585. if (Debug && i >= nelem(dispatch)){
  586. Bprint(bo, "op=0x%x len=%d\n", b->op, b->len);
  587. xd(b);
  588. }
  589. skip(b, b->len);
  590. }
  591. }
  592. void
  593. usage(void)
  594. {
  595. fprint(2, "usage: %s [-nta] [-d delim] file.xls\n", argv0);
  596. exits("usage");
  597. }
  598. void
  599. main(int argc, char *argv[])
  600. {
  601. int i;
  602. Biobuf bin, bout, *bp;
  603. ARGBEGIN{
  604. case 'n':
  605. Nopad = 1;
  606. break;
  607. case 't':
  608. Trunc = 1;
  609. break;
  610. case 'a':
  611. All = 1;
  612. break;
  613. case 'd':
  614. Delim = EARGF(usage());
  615. break;
  616. case 'D':
  617. Debug = 1;
  618. break;
  619. default:
  620. usage();
  621. break;
  622. }ARGEND;
  623. if (argc != 1)
  624. usage();
  625. bo = &bout;
  626. quotefmtinstall();
  627. Binit(bo, OWRITE, 1);
  628. if(argc > 0) {
  629. for(i = 0; i < argc; i++){
  630. if ((bp = Bopen(argv[i], OREAD)) == nil)
  631. sysfatal("%s cannot open - %r\n", argv[i]);
  632. xls2csv(bp);
  633. Bterm(bp);
  634. }
  635. } else {
  636. Binit(&bin, 0, OREAD);
  637. xls2csv(&bin);
  638. }
  639. exits(0);
  640. }