msexceltables.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757
  1. /* msexceltables.c Steve Simon 5-Jan-2005 */
  2. #include <u.h>
  3. #include <libc.h>
  4. #include <bio.h>
  5. #include <ctype.h>
  6. enum {
  7. Tillegal = 0,
  8. Tnumber, // cell types
  9. Tlabel,
  10. Tindex,
  11. Tbool,
  12. Terror,
  13. Ver8 = 0x600, // only BIFF8 and BIFF8x files support unicode
  14. Nwidths = 4096,
  15. };
  16. typedef struct Biff Biff;
  17. typedef struct Col Col;
  18. typedef struct Row Row;
  19. struct Row {
  20. Row *next; // next row
  21. int r; // row number
  22. Col *col; // list of cols in row
  23. };
  24. struct Col {
  25. Col *next; // next col in row
  26. int c; // col number
  27. int f; // index into formating table (Xf)
  28. int type; // type of value for union below
  29. union { // value
  30. int index; // index into string table (Strtab)
  31. int error;
  32. int bool;
  33. char *label;
  34. double number;
  35. };
  36. };
  37. struct Biff {
  38. Biobuf *bp; // input file
  39. int op; // current record type
  40. int len; // length of current record
  41. };
  42. // options
  43. static int Nopad = 0; // disable padding cells to colum width
  44. static int Trunc = 0; // truncate cells to colum width
  45. static int All = 0; // dump all sheet types, Worksheets only by default
  46. static char *Delim = " "; // field delimiter
  47. static int Debug = 0;
  48. // file scope
  49. static int Defwidth = 10; // default colum width if non given
  50. static int Biffver; // file vesion
  51. static int Datemode; // date ref: 1899-Dec-31 or 1904-jan-1
  52. static char **Strtab = nil; // label contents heap
  53. static int Nstrtab = 0; // # of above
  54. static int *Xf; // array of extended format indices
  55. static int Nxf = 0; // # of above
  56. static Biobuf *bo; // stdout (sic)
  57. // table scope
  58. static int Width[Nwidths]; // array of colum widths
  59. static int Ncols = -1; // max colums in table used
  60. static int Content = 0; // type code for contents of sheet
  61. static Row *Root = nil; // one worksheet's worth of cells
  62. static char *Months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
  63. "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
  64. static char *Errmsgs[] = {
  65. [0x0] "#NULL!", // intersection of two cell ranges is empty
  66. [0x7] "#DIV/0!", // division by zero
  67. [0xf] "#VALUE!", // wrong type of operand
  68. [0x17] "#REF!", // illegal or deleted cell reference
  69. [0x1d] "#NAME?", // wrong function or range name
  70. [0x24] "#NUM!", // value range overflow
  71. [0x2a] "#N/A!", // argument of function not available
  72. };
  73. void
  74. cell(int r, int c, int f, int type, void *val)
  75. {
  76. Row *row, *nrow;
  77. Col *col, *ncol;
  78. if (c > Ncols)
  79. Ncols = c;
  80. if ((ncol = malloc(sizeof(Col))) == nil)
  81. sysfatal("no memory\n");
  82. ncol->c = c;
  83. ncol->f = f;
  84. ncol->type = type;
  85. ncol->next = nil;
  86. switch(type){
  87. case Tnumber: ncol->number = *(double *)val; break;
  88. case Tlabel: ncol->label = (char *)val; break;
  89. case Tindex: ncol->index = *(int *)val; break;
  90. case Tbool: ncol->bool = *(int *)val; break;
  91. case Terror: ncol->error = *(int *)val; break;
  92. default: sysfatal("can't happen error\n");
  93. }
  94. if (Root == nil || Root->r > r){
  95. if ((nrow = malloc(sizeof(Row))) == nil)
  96. sysfatal("no memory\n");
  97. nrow->col = ncol;
  98. ncol->next = nil;
  99. nrow->r = r;
  100. nrow->next = Root;
  101. Root = nrow;
  102. return;
  103. }
  104. for (row = Root; row; row = row->next){
  105. if (row->r == r){
  106. if (row->col->c > c){
  107. ncol->next = row->col;
  108. row->col = ncol;
  109. return;
  110. }
  111. else{
  112. for (col = row->col; col; col = col->next)
  113. if (col->next == nil || col->next->c > c){
  114. ncol->next = col->next;
  115. col->next = ncol;
  116. return;
  117. }
  118. }
  119. }
  120. if (row->next == nil || row->next->r > r){
  121. if ((nrow = malloc(sizeof(Row))) == nil)
  122. sysfatal("no memory\n");
  123. nrow->col = ncol;
  124. nrow->r = r;
  125. nrow->next = row->next;
  126. row->next = nrow;
  127. return;
  128. }
  129. }
  130. sysfatal("cannot happen error\n");
  131. }
  132. struct Tm *
  133. bifftime(double num)
  134. {
  135. long long t = num;
  136. /* Beware - These epochs are wrong, this
  137. * is due to Excel still remaining compatible
  138. * with Lotus-123, which incorrectly believed 1900
  139. * was a leap year
  140. */
  141. if (Datemode)
  142. t -= 24107; // epoch = 1/1/1904
  143. else
  144. t -= 25569; // epoch = 31/12/1899
  145. t *= 60*60*24;
  146. return localtime((long)t);
  147. }
  148. void
  149. numfmt(int fmt, int min, int max, double num)
  150. {
  151. char buf[1024];
  152. struct Tm *tm;
  153. if (fmt == 9)
  154. snprint(buf, sizeof(buf),"%.0f%%", num);
  155. else
  156. if (fmt == 10)
  157. snprint(buf, sizeof(buf),"%f%%", num);
  158. else
  159. if (fmt == 11 || fmt == 48)
  160. snprint(buf, sizeof(buf),"%e", num);
  161. else
  162. if (fmt >= 14 && fmt <= 17){
  163. tm = bifftime(num);
  164. snprint(buf, sizeof(buf),"%d-%s-%d",
  165. tm->mday, Months[tm->mon], tm->year+1900);
  166. }
  167. else
  168. if ((fmt >= 18 && fmt <= 21) || (fmt >= 45 && fmt <= 47)){
  169. tm = bifftime(num);
  170. snprint(buf, sizeof(buf),"%02d:%02d:%02d", tm->hour, tm->min, tm->sec);
  171. }
  172. else
  173. if (fmt == 22){
  174. tm = bifftime(num);
  175. snprint(buf, sizeof(buf),"%02d:%02d:%02d %d-%s-%d",
  176. tm->hour, tm->min, tm->sec,
  177. tm->mday, Months[tm->mon], tm->year+1900);
  178. }else
  179. snprint(buf, sizeof(buf),"%g", num);
  180. Bprint(bo, "%-*.*q", min, max, buf);
  181. }
  182. void
  183. dump(void)
  184. {
  185. Row *r;
  186. Col *c;
  187. int i, min, max;
  188. for (r = Root; r; r = r->next){
  189. for (c = r->col; c; c = c->next){
  190. if (c->c < 0 || c->c >= Nwidths || (min = Width[c->c]) == 0)
  191. min = Defwidth;
  192. if ((c->next && c->c == c->next->c) || Nopad)
  193. min = 0;
  194. max = -1;
  195. if (Trunc && min > 2)
  196. max = min -2; // FIXME: -2 because of bug %q format ?
  197. switch(c->type){
  198. case Tnumber:
  199. if (Xf[c->f] == 0)
  200. Bprint(bo, "%-*.*g", min, max, c->number);
  201. else
  202. numfmt(Xf[c->f], min, max, c->number);
  203. break;
  204. case Tlabel:
  205. Bprint(bo, "%-*.*q", min, max, c->label);
  206. break;
  207. case Tbool:
  208. Bprint(bo, "%-*.*s", min, max, (c->bool)? "True": "False");
  209. break;
  210. case Tindex:
  211. if (c->index < 0 || c->index >= Nstrtab)
  212. sysfatal("SST string out of range - corrupt file?\n");
  213. Bprint(bo, "%-*.*q", min, max, Strtab[c->index]);
  214. break;
  215. case Terror:
  216. if (c->error < 0 || c->error >= nelem(Errmsgs) || !Errmsgs[c->error])
  217. Bprint(bo, "#ERR=%d", c->index);
  218. else
  219. Bprint(bo, "%-*.*q", min, max, Errmsgs[c->error]);
  220. break;
  221. default:
  222. sysfatal("cannot happen error\n");
  223. break;
  224. }
  225. if (c->next){
  226. if (c->next->c == c->c) // bar charts
  227. Bprint(bo, "=");
  228. else{
  229. Bprint(bo, "%s", Delim);
  230. for (i = c->c; c->next && i < c->next->c -1; i++)
  231. Bprint(bo, "%-*.*s%s", min, max, "", Delim);
  232. }
  233. }
  234. }
  235. if (r->next)
  236. for (i = r->r; i < r->next->r; i++)
  237. Bprint(bo, "\n");
  238. }
  239. Bprint(bo, "\n");
  240. }
  241. void
  242. release(void)
  243. {
  244. Row *r, *or;
  245. Col *c, *oc;
  246. r = Root;
  247. while(r){
  248. c = r->col;
  249. while(c){
  250. if (c->type == Tlabel)
  251. free(c->label);
  252. oc = c;
  253. c = c->next;
  254. free(oc);
  255. }
  256. or = r;
  257. r = r->next;
  258. free(or);
  259. }
  260. Root = nil;
  261. memset(Width, 0, sizeof(Width));
  262. Ncols = -1;
  263. }
  264. void
  265. skip(Biff *b, int len)
  266. {
  267. if (Bseek(b->bp, len, 1) == -1)
  268. sysfatal("seek failed - %r\n");
  269. b->len -= len;
  270. }
  271. void
  272. gmem(Biff *b, void *p, int n)
  273. {
  274. if (b->len < n)
  275. sysfatal("short record %d < %d\n", b->len, n);
  276. if (Bread(b->bp, p, n) != n)
  277. sysfatal("unexpected EOF - %r\n");
  278. b->len -= n;
  279. }
  280. void
  281. xd(Biff *b)
  282. {
  283. uvlong off;
  284. uchar buf[16];
  285. int addr, got, n, i, j;
  286. addr = 0;
  287. off = Boffset(b->bp);
  288. while (addr < b->len){
  289. n = (b->len >= sizeof(buf))? sizeof(buf): b->len;
  290. got = Bread(b->bp, buf, n);
  291. Bprint(bo, " %6d ", addr);
  292. addr += n;
  293. for (i = 0; i < got; i++)
  294. Bprint(bo, "%02x ", buf[i]);
  295. for (j = i; j < 16; j++)
  296. Bprint(bo, " ");
  297. Bprint(bo, " ");
  298. for (i = 0; i < got; i++)
  299. Bprint(bo, "%c", isprint(buf[i])? buf[i]: '.');
  300. Bprint(bo, "\n");
  301. }
  302. Bseek(b->bp, off, 0);
  303. }
  304. static int
  305. getrec(Biff *b)
  306. {
  307. int c;
  308. if ((c = Bgetc(b->bp)) == -1)
  309. return -1; // real EOF
  310. b->op = c;
  311. if ((c = Bgetc(b->bp)) == -1)
  312. sysfatal("unexpected EOF - %r\n");
  313. b->op |= c << 8;
  314. if ((c = Bgetc(b->bp)) == -1)
  315. sysfatal("unexpected EOF - %r\n");
  316. b->len = c;
  317. if ((c = Bgetc(b->bp)) == -1)
  318. sysfatal("unexpected EOF - %r\n");
  319. b->len |= c << 8;
  320. if (b->op == 0 && b->len == 0)
  321. return -1;
  322. if (Debug){
  323. Bprint(bo, "op=0x%x len=%d\n", b->op, b->len);
  324. xd(b);
  325. }
  326. return 0;
  327. }
  328. static uvlong
  329. gint(Biff *b, int n)
  330. {
  331. int i, c;
  332. uvlong vl, rc;
  333. if (b->len < n)
  334. return -1;
  335. rc = 0;
  336. for (i = 0; i < n; i++){
  337. if ((c = Bgetc(b->bp)) == -1)
  338. sysfatal("unexpected EOF - %r\n");
  339. b->len--;
  340. vl = c;
  341. rc |= vl << (8*i);
  342. }
  343. return rc;
  344. }
  345. double
  346. grk(Biff *b)
  347. {
  348. int f;
  349. uvlong n;
  350. double d;
  351. n = gint(b, 4);
  352. f = n & 3;
  353. n &= ~3LL;
  354. if (f & 2){
  355. d = n / 4.0;
  356. }
  357. else{
  358. n <<= 32;
  359. memcpy(&d, &n, sizeof(d));
  360. }
  361. if (f & 1)
  362. d /= 100.0;
  363. return d;
  364. }
  365. double
  366. gdoub(Biff *b)
  367. {
  368. double d;
  369. uvlong n = gint(b, 8);
  370. memcpy(&d, &n, sizeof(n));
  371. return d;
  372. }
  373. char *
  374. gstr(Biff *b, int len_width)
  375. {
  376. Rune r;
  377. char *buf, *p;
  378. int nch, w, sz, ln, rt, opt;
  379. if (b->len < len_width){
  380. if (getrec(b) == -1)
  381. sysfatal("expected CONTINUE, got EOF\n");
  382. if (b->op != 0x03c)
  383. sysfatal("expected CONTINUE, got op=0x%x\n", b->op);
  384. }
  385. ln = gint(b, len_width);
  386. if (Biffver != Ver8){
  387. if ((buf = calloc(ln+1, sizeof(char))) == nil)
  388. sysfatal("no memory\n");
  389. gmem(b, buf, ln);
  390. return buf;
  391. }
  392. if ((buf = calloc(ln+1, sizeof(char)*UTFmax)) == nil)
  393. sysfatal("no memory\n");
  394. p = buf;
  395. if (ln == 0)
  396. return buf;
  397. nch = 0;
  398. *buf = 0;
  399. while (1){
  400. opt = gint(b, 1);
  401. w = (opt & 1)? sizeof(Rune): sizeof(char);
  402. if(opt & 4)
  403. sz = gint(b,4);
  404. else
  405. sz = 0;
  406. if(opt & 8)
  407. rt = gint(b, 2);
  408. else
  409. rt = 0;
  410. while(b->len > 0){
  411. r = gint(b, w);
  412. p += runetochar(p, &r);
  413. if (++nch >= ln){
  414. if (opt & 4)
  415. skip(b, sz);
  416. if (opt & 8)
  417. skip(b, rt*4);
  418. return buf;
  419. }
  420. }
  421. if (getrec(b) == -1)
  422. sysfatal("expected CONTINUE, got EOF\n");
  423. if (b->op != 0x03c)
  424. sysfatal("expected CONTINUE, got op=0x%x\n", b->op);
  425. }
  426. sysfatal("cannot ever happen error\n");
  427. return nil; // shut up 8c
  428. }
  429. void
  430. sst(Biff *b)
  431. {
  432. int n;
  433. skip(b, 4); // total # strings
  434. Nstrtab = gint(b, 4); // # unique strings
  435. if ((Strtab = calloc(Nstrtab, sizeof(char *))) == nil)
  436. sysfatal("no memory\n");
  437. for (n = 0; n < Nstrtab; n++)
  438. Strtab[n] = gstr(b, 2);
  439. }
  440. void
  441. boolerr(Biff *b)
  442. {
  443. int r = gint(b, 2); // row
  444. int c = gint(b, 2); // col
  445. int f = gint(b, 2); // formatting ref
  446. int v = gint(b, 1); // bool value / err code
  447. int t = gint(b, 1); // type
  448. cell(r, c, f, (t)? Terror: Tbool, &v);
  449. }
  450. void
  451. rk(Biff *b)
  452. {
  453. int r = gint(b, 2); // row
  454. int c = gint(b, 2); // col
  455. int f = gint(b, 2); // formatting ref
  456. double v = grk(b); // value
  457. cell(r, c, f, Tnumber, &v);
  458. }
  459. void
  460. mulrk(Biff *b)
  461. {
  462. int r = gint(b, 2); // row
  463. int c = gint(b, 2); // first col
  464. while (b->len >= 6){
  465. int f = gint(b, 2); // formatting ref
  466. double v = grk(b); // value
  467. cell(r, c++, f, Tnumber, &v);
  468. }
  469. }
  470. void
  471. number(Biff *b)
  472. {
  473. int r = gint(b, 2); // row
  474. int c = gint(b, 2); // col
  475. int f = gint(b, 2); // formatting ref
  476. double v = gdoub(b); // double
  477. cell(r, c, f, Tnumber, &v);
  478. }
  479. void
  480. label(Biff *b)
  481. {
  482. int r = gint(b, 2); // row
  483. int c = gint(b, 2); // col
  484. int f = gint(b, 2); // formatting ref
  485. char *s = gstr(b, 2); // byte string
  486. cell(r, c, f, Tlabel, s);
  487. }
  488. void
  489. labelsst(Biff *b)
  490. {
  491. int r = gint(b, 2); // row
  492. int c = gint(b, 2); // col
  493. int f = gint(b, 2); // formatting ref
  494. int i = gint(b, 2); // sst string ref
  495. cell(r, c, f, Tindex, &i);
  496. }
  497. void
  498. bof(Biff *b)
  499. {
  500. Biffver = gint(b, 2);
  501. Content = gint(b, 2);
  502. }
  503. void
  504. defcolwidth(Biff *b)
  505. {
  506. Defwidth = gint(b, 2);
  507. }
  508. void
  509. datemode(Biff *b)
  510. {
  511. Datemode = gint(b, 2);
  512. }
  513. void
  514. eof(Biff *b)
  515. {
  516. int i;
  517. struct {
  518. int n;
  519. char *s;
  520. } names[] = {
  521. 0x005, "Workbook globals",
  522. 0x006, "Visual Basic module",
  523. 0x010, "Worksheet",
  524. 0x020, "Chart",
  525. 0x040, "Macro sheet",
  526. 0x100, "Workspace file",
  527. };
  528. if (Ncols != -1){
  529. if (All){
  530. for (i = 0; i < nelem(names); i++)
  531. if (names[i].n == Content){
  532. Bprint(bo, "\n# contents %s\n", names[i].s);
  533. dump();
  534. }
  535. }
  536. else
  537. if (Content == 0x10)
  538. dump();
  539. }
  540. release();
  541. USED(b);
  542. }
  543. void
  544. colinfo(Biff *b)
  545. {
  546. int c;
  547. int c1 = gint(b, 2);
  548. int c2 = gint(b, 2);
  549. int w = gint(b, 2);
  550. if (c1 < 0)
  551. sysfatal("negative column number (%d)\n", c1);
  552. if (c2 >= Nwidths)
  553. sysfatal("too many columns (%d > %d)\n", c2, Nwidths);
  554. w /= 256;
  555. if (w > 100)
  556. w = 100;
  557. if (w < 0)
  558. w = 0;
  559. for (c = c1; c <= c2; c++)
  560. Width[c] = w;
  561. }
  562. void
  563. xf(Biff *b)
  564. {
  565. int fmt;
  566. static int nalloc = 0;
  567. skip(b, 2);
  568. fmt = gint(b, 2);
  569. if (nalloc >= Nxf){
  570. nalloc += 20;
  571. if ((Xf = realloc(Xf, nalloc*sizeof(int))) == nil)
  572. sysfatal("no memory\n");
  573. }
  574. Xf[Nxf++] = fmt;
  575. }
  576. void
  577. writeaccess(Biff *b)
  578. {
  579. Bprint(bo, "# author %s\n", gstr(b, 2));
  580. }
  581. void
  582. codepage(Biff *b)
  583. {
  584. int codepage = gint(b, 2);
  585. if (codepage != 1200) // 1200 == UTF-16
  586. Bprint(bo, "# codepage %d\n", codepage);
  587. }
  588. void
  589. xls2csv(Biobuf *bp)
  590. {
  591. int i;
  592. Biff biff, *b;
  593. struct {
  594. int op;
  595. void (*func)(Biff *);
  596. } dispatch[] = {
  597. 0x000a, eof,
  598. 0x0022, datemode,
  599. 0x0042, codepage,
  600. 0x0055, defcolwidth,
  601. 0x005c, writeaccess,
  602. 0x007d, colinfo,
  603. 0x00bd, mulrk,
  604. 0x00fc, sst,
  605. 0x00fd, labelsst,
  606. 0x0203, number,
  607. 0x0204, label,
  608. 0x0205, boolerr,
  609. 0x027e, rk,
  610. 0x0809, bof,
  611. 0x00e0, xf,
  612. };
  613. b = &biff;
  614. b->bp = bp;
  615. while(getrec(b) != -1){
  616. for (i = 0; i < nelem(dispatch); i++)
  617. if (b->op == dispatch[i].op)
  618. (*dispatch[i].func)(b);
  619. skip(b, b->len);
  620. }
  621. }
  622. void
  623. usage(void)
  624. {
  625. fprint(2, "usage: %s [-aDnt] [-d delim] /mnt/doc/Workbook\n", argv0);
  626. exits("usage");
  627. }
  628. void
  629. main(int argc, char *argv[])
  630. {
  631. int i;
  632. Biobuf bin, bout, *bp;
  633. ARGBEGIN{
  634. case 'n':
  635. Nopad = 1;
  636. break;
  637. case 't':
  638. Trunc = 1;
  639. break;
  640. case 'a':
  641. All = 1;
  642. break;
  643. case 'd':
  644. Delim = EARGF(usage());
  645. break;
  646. case 'D':
  647. Debug = 1;
  648. break;
  649. default:
  650. usage();
  651. break;
  652. }ARGEND;
  653. if (argc != 1)
  654. usage();
  655. bo = &bout;
  656. quotefmtinstall();
  657. Binit(bo, OWRITE, 1);
  658. if(argc > 0) {
  659. for(i = 0; i < argc; i++){
  660. if ((bp = Bopen(argv[i], OREAD)) == nil)
  661. sysfatal("%s cannot open - %r\n", argv[i]);
  662. xls2csv(bp);
  663. Bterm(bp);
  664. }
  665. } else {
  666. Binit(&bin, 0, OREAD);
  667. xls2csv(&bin);
  668. }
  669. exits(0);
  670. }