123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757 |
- /* msexceltables.c Steve Simon 5-Jan-2005 */
- #include <u.h>
- #include <libc.h>
- #include <bio.h>
- #include <ctype.h>
- enum {
- Tillegal = 0,
- Tnumber, // cell types
- Tlabel,
- Tindex,
- Tbool,
- Terror,
- Ver8 = 0x600, // only BIFF8 and BIFF8x files support unicode
- Nwidths = 4096,
- };
-
-
- typedef struct Biff Biff;
- typedef struct Col Col;
- typedef struct Row Row;
- struct Row {
- Row *next; // next row
- int r; // row number
- Col *col; // list of cols in row
- };
- struct Col {
- Col *next; // next col in row
- int c; // col number
- int f; // index into formating table (Xf)
- int type; // type of value for union below
- union { // value
- int index; // index into string table (Strtab)
- int error;
- int bool;
- char *label;
- double number;
- };
- };
- struct Biff {
- Biobuf *bp; // input file
- int op; // current record type
- int len; // length of current record
- };
- // options
- static int Nopad = 0; // disable padding cells to colum width
- static int Trunc = 0; // truncate cells to colum width
- static int All = 0; // dump all sheet types, Worksheets only by default
- static char *Delim = " "; // field delimiter
- static int Debug = 0;
- // file scope
- static int Defwidth = 10; // default colum width if non given
- static int Biffver; // file vesion
- static int Datemode; // date ref: 1899-Dec-31 or 1904-jan-1
- static char **Strtab = nil; // label contents heap
- static int Nstrtab = 0; // # of above
- static int *Xf; // array of extended format indices
- static int Nxf = 0; // # of above
- static Biobuf *bo; // stdout (sic)
- // table scope
- static int Width[Nwidths]; // array of colum widths
- static int Ncols = -1; // max colums in table used
- static int Content = 0; // type code for contents of sheet
- static Row *Root = nil; // one worksheet's worth of cells
-
- static char *Months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
- "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
- static char *Errmsgs[] = {
- [0x0] "#NULL!", // intersection of two cell ranges is empty
- [0x7] "#DIV/0!", // division by zero
- [0xf] "#VALUE!", // wrong type of operand
- [0x17] "#REF!", // illegal or deleted cell reference
- [0x1d] "#NAME?", // wrong function or range name
- [0x24] "#NUM!", // value range overflow
- [0x2a] "#N/A!", // argument of function not available
- };
-
- void
- cell(int r, int c, int f, int type, void *val)
- {
- Row *row, *nrow;
- Col *col, *ncol;
- if (c > Ncols)
- Ncols = c;
- if ((ncol = malloc(sizeof(Col))) == nil)
- sysfatal("no memory\n");
- ncol->c = c;
- ncol->f = f;
- ncol->type = type;
- ncol->next = nil;
- switch(type){
- case Tnumber: ncol->number = *(double *)val; break;
- case Tlabel: ncol->label = (char *)val; break;
- case Tindex: ncol->index = *(int *)val; break;
- case Tbool: ncol->bool = *(int *)val; break;
- case Terror: ncol->error = *(int *)val; break;
- default: sysfatal("can't happen error\n");
- }
- if (Root == nil || Root->r > r){
- if ((nrow = malloc(sizeof(Row))) == nil)
- sysfatal("no memory\n");
- nrow->col = ncol;
- ncol->next = nil;
- nrow->r = r;
- nrow->next = Root;
- Root = nrow;
- return;
- }
- for (row = Root; row; row = row->next){
- if (row->r == r){
- if (row->col->c > c){
- ncol->next = row->col;
- row->col = ncol;
- return;
- }
- else{
- for (col = row->col; col; col = col->next)
- if (col->next == nil || col->next->c > c){
- ncol->next = col->next;
- col->next = ncol;
- return;
- }
- }
- }
- if (row->next == nil || row->next->r > r){
- if ((nrow = malloc(sizeof(Row))) == nil)
- sysfatal("no memory\n");
- nrow->col = ncol;
- nrow->r = r;
- nrow->next = row->next;
- row->next = nrow;
- return;
- }
- }
- sysfatal("cannot happen error\n");
- }
- struct Tm *
- bifftime(double num)
- {
- long long t = num;
- /* Beware - These epochs are wrong, this
- * is due to Excel still remaining compatible
- * with Lotus-123, which incorrectly believed 1900
- * was a leap year
- */
- if (Datemode)
- t -= 24107; // epoch = 1/1/1904
- else
- t -= 25569; // epoch = 31/12/1899
- t *= 60*60*24;
- return localtime((long)t);
- }
- void
- numfmt(int fmt, int min, int max, double num)
- {
- char buf[1024];
- struct Tm *tm;
- if (fmt == 9)
- snprint(buf, sizeof(buf),"%.0f%%", num);
- else
- if (fmt == 10)
- snprint(buf, sizeof(buf),"%f%%", num);
- else
- if (fmt == 11 || fmt == 48)
- snprint(buf, sizeof(buf),"%e", num);
- else
- if (fmt >= 14 && fmt <= 17){
- tm = bifftime(num);
- snprint(buf, sizeof(buf),"%d-%s-%d",
- tm->mday, Months[tm->mon], tm->year+1900);
- }
- else
- if ((fmt >= 18 && fmt <= 21) || (fmt >= 45 && fmt <= 47)){
- tm = bifftime(num);
- snprint(buf, sizeof(buf),"%02d:%02d:%02d", tm->hour, tm->min, tm->sec);
- }
- else
- if (fmt == 22){
- tm = bifftime(num);
- snprint(buf, sizeof(buf),"%02d:%02d:%02d %d-%s-%d",
- tm->hour, tm->min, tm->sec,
- tm->mday, Months[tm->mon], tm->year+1900);
- }else
- snprint(buf, sizeof(buf),"%g", num);
- Bprint(bo, "%-*.*q", min, max, buf);
- }
- void
- dump(void)
- {
- Row *r;
- Col *c;
- int i, min, max;
- for (r = Root; r; r = r->next){
- for (c = r->col; c; c = c->next){
- if (c->c < 0 || c->c >= Nwidths || (min = Width[c->c]) == 0)
- min = Defwidth;
- if ((c->next && c->c == c->next->c) || Nopad)
- min = 0;
- max = -1;
- if (Trunc && min > 2)
- max = min -2; // FIXME: -2 because of bug %q format ?
- switch(c->type){
- case Tnumber:
- if (Xf[c->f] == 0)
- Bprint(bo, "%-*.*g", min, max, c->number);
- else
- numfmt(Xf[c->f], min, max, c->number);
- break;
- case Tlabel:
- Bprint(bo, "%-*.*q", min, max, c->label);
- break;
- case Tbool:
- Bprint(bo, "%-*.*s", min, max, (c->bool)? "True": "False");
- break;
- case Tindex:
- if (c->index < 0 || c->index >= Nstrtab)
- sysfatal("SST string out of range - corrupt file?\n");
- Bprint(bo, "%-*.*q", min, max, Strtab[c->index]);
- break;
- case Terror:
- if (c->error < 0 || c->error >= nelem(Errmsgs) || !Errmsgs[c->error])
- Bprint(bo, "#ERR=%d", c->index);
- else
- Bprint(bo, "%-*.*q", min, max, Errmsgs[c->error]);
- break;
- default:
- sysfatal("cannot happen error\n");
- break;
- }
- if (c->next){
- if (c->next->c == c->c) // bar charts
- Bprint(bo, "=");
- else{
- Bprint(bo, "%s", Delim);
- for (i = c->c; c->next && i < c->next->c -1; i++)
- Bprint(bo, "%-*.*s%s", min, max, "", Delim);
- }
- }
- }
- if (r->next)
- for (i = r->r; i < r->next->r; i++)
- Bprint(bo, "\n");
- }
- Bprint(bo, "\n");
- }
- void
- release(void)
- {
- Row *r, *or;
- Col *c, *oc;
- r = Root;
- while(r){
- c = r->col;
- while(c){
- if (c->type == Tlabel)
- free(c->label);
- oc = c;
- c = c->next;
- free(oc);
- }
- or = r;
- r = r->next;
- free(or);
- }
- Root = nil;
- memset(Width, 0, sizeof(Width));
- Ncols = -1;
- }
- void
- skip(Biff *b, int len)
- {
- if (Bseek(b->bp, len, 1) == -1)
- sysfatal("seek failed - %r\n");
- b->len -= len;
- }
- void
- gmem(Biff *b, void *p, int n)
- {
- if (b->len < n)
- sysfatal("short record %d < %d\n", b->len, n);
- if (Bread(b->bp, p, n) != n)
- sysfatal("unexpected EOF - %r\n");
- b->len -= n;
- }
- void
- xd(Biff *b)
- {
- uvlong off;
- uchar buf[16];
- int addr, got, n, i, j;
- addr = 0;
- off = Boffset(b->bp);
- while (addr < b->len){
- n = (b->len >= sizeof(buf))? sizeof(buf): b->len;
- got = Bread(b->bp, buf, n);
- Bprint(bo, " %6d ", addr);
- addr += n;
- for (i = 0; i < got; i++)
- Bprint(bo, "%02x ", buf[i]);
- for (j = i; j < 16; j++)
- Bprint(bo, " ");
- Bprint(bo, " ");
- for (i = 0; i < got; i++)
- Bprint(bo, "%c", isprint(buf[i])? buf[i]: '.');
- Bprint(bo, "\n");
- }
- Bseek(b->bp, off, 0);
- }
- static int
- getrec(Biff *b)
- {
- int c;
- if ((c = Bgetc(b->bp)) == -1)
- return -1; // real EOF
- b->op = c;
- if ((c = Bgetc(b->bp)) == -1)
- sysfatal("unexpected EOF - %r\n");
- b->op |= c << 8;
- if ((c = Bgetc(b->bp)) == -1)
- sysfatal("unexpected EOF - %r\n");
- b->len = c;
- if ((c = Bgetc(b->bp)) == -1)
- sysfatal("unexpected EOF - %r\n");
- b->len |= c << 8;
- if (b->op == 0 && b->len == 0)
- return -1;
- if (Debug){
- Bprint(bo, "op=0x%x len=%d\n", b->op, b->len);
- xd(b);
- }
- return 0;
- }
- static uvlong
- gint(Biff *b, int n)
- {
- int i, c;
- uvlong vl, rc;
- if (b->len < n)
- return -1;
- rc = 0;
- for (i = 0; i < n; i++){
- if ((c = Bgetc(b->bp)) == -1)
- sysfatal("unexpected EOF - %r\n");
- b->len--;
- vl = c;
- rc |= vl << (8*i);
- }
- return rc;
- }
- double
- grk(Biff *b)
- {
- int f;
- uvlong n;
- double d;
- n = gint(b, 4);
- f = n & 3;
- n &= ~3LL;
- if (f & 2){
- d = n / 4.0;
- }
- else{
- n <<= 32;
- memcpy(&d, &n, sizeof(d));
- }
- if (f & 1)
- d /= 100.0;
- return d;
- }
- double
- gdoub(Biff *b)
- {
- double d;
- uvlong n = gint(b, 8);
- memcpy(&d, &n, sizeof(n));
- return d;
- }
- char *
- gstr(Biff *b, int len_width)
- {
- Rune r;
- char *buf, *p;
- int nch, w, sz, ln, rt, opt;
- if (b->len < len_width){
- if (getrec(b) == -1)
- sysfatal("expected CONTINUE, got EOF\n");
- if (b->op != 0x03c)
- sysfatal("expected CONTINUE, got op=0x%x\n", b->op);
- }
- ln = gint(b, len_width);
- if (Biffver != Ver8){
- if ((buf = calloc(ln+1, sizeof(char))) == nil)
- sysfatal("no memory\n");
- gmem(b, buf, ln);
- return buf;
- }
- if ((buf = calloc(ln+1, sizeof(char)*UTFmax)) == nil)
- sysfatal("no memory\n");
- p = buf;
- if (ln == 0)
- return buf;
- nch = 0;
- *buf = 0;
- while (1){
- opt = gint(b, 1);
- w = (opt & 1)? sizeof(Rune): sizeof(char);
- if(opt & 4)
- sz = gint(b,4);
- else
- sz = 0;
- if(opt & 8)
- rt = gint(b, 2);
- else
- rt = 0;
- while(b->len > 0){
- r = gint(b, w);
- p += runetochar(p, &r);
- if (++nch >= ln){
- if (opt & 4)
- skip(b, sz);
- if (opt & 8)
- skip(b, rt*4);
- return buf;
- }
- }
- if (getrec(b) == -1)
- sysfatal("expected CONTINUE, got EOF\n");
- if (b->op != 0x03c)
- sysfatal("expected CONTINUE, got op=0x%x\n", b->op);
- }
- sysfatal("cannot ever happen error\n");
- return nil; // shut up 8c
- }
- void
- sst(Biff *b)
- {
- int n;
-
- skip(b, 4); // total # strings
- Nstrtab = gint(b, 4); // # unique strings
- if ((Strtab = calloc(Nstrtab, sizeof(char *))) == nil)
- sysfatal("no memory\n");
- for (n = 0; n < Nstrtab; n++)
- Strtab[n] = gstr(b, 2);
- }
- void
- boolerr(Biff *b)
- {
- int r = gint(b, 2); // row
- int c = gint(b, 2); // col
- int f = gint(b, 2); // formatting ref
- int v = gint(b, 1); // bool value / err code
- int t = gint(b, 1); // type
- cell(r, c, f, (t)? Terror: Tbool, &v);
- }
- void
- rk(Biff *b)
- {
- int r = gint(b, 2); // row
- int c = gint(b, 2); // col
- int f = gint(b, 2); // formatting ref
- double v = grk(b); // value
- cell(r, c, f, Tnumber, &v);
- }
- void
- mulrk(Biff *b)
- {
- int r = gint(b, 2); // row
- int c = gint(b, 2); // first col
- while (b->len >= 6){
- int f = gint(b, 2); // formatting ref
- double v = grk(b); // value
- cell(r, c++, f, Tnumber, &v);
- }
- }
- void
- number(Biff *b)
- {
- int r = gint(b, 2); // row
- int c = gint(b, 2); // col
- int f = gint(b, 2); // formatting ref
- double v = gdoub(b); // double
- cell(r, c, f, Tnumber, &v);
- }
- void
- label(Biff *b)
- {
- int r = gint(b, 2); // row
- int c = gint(b, 2); // col
- int f = gint(b, 2); // formatting ref
- char *s = gstr(b, 2); // byte string
- cell(r, c, f, Tlabel, s);
- }
- void
- labelsst(Biff *b)
- {
- int r = gint(b, 2); // row
- int c = gint(b, 2); // col
- int f = gint(b, 2); // formatting ref
- int i = gint(b, 2); // sst string ref
- cell(r, c, f, Tindex, &i);
- }
- void
- bof(Biff *b)
- {
- Biffver = gint(b, 2);
- Content = gint(b, 2);
- }
- void
- defcolwidth(Biff *b)
- {
- Defwidth = gint(b, 2);
- }
- void
- datemode(Biff *b)
- {
- Datemode = gint(b, 2);
- }
- void
- eof(Biff *b)
- {
- int i;
- struct {
- int n;
- char *s;
- } names[] = {
- 0x005, "Workbook globals",
- 0x006, "Visual Basic module",
- 0x010, "Worksheet",
- 0x020, "Chart",
- 0x040, "Macro sheet",
- 0x100, "Workspace file",
- };
-
- if (Ncols != -1){
- if (All){
- for (i = 0; i < nelem(names); i++)
- if (names[i].n == Content){
- Bprint(bo, "\n# contents %s\n", names[i].s);
- dump();
- }
- }
- else
- if (Content == 0x10)
- dump();
- }
- release();
- USED(b);
- }
- void
- colinfo(Biff *b)
- {
- int c;
- int c1 = gint(b, 2);
- int c2 = gint(b, 2);
- int w = gint(b, 2);
- if (c1 < 0)
- sysfatal("negative column number (%d)\n", c1);
- if (c2 >= Nwidths)
- sysfatal("too many columns (%d > %d)\n", c2, Nwidths);
- w /= 256;
- if (w > 100)
- w = 100;
- if (w < 0)
- w = 0;
- for (c = c1; c <= c2; c++)
- Width[c] = w;
- }
- void
- xf(Biff *b)
- {
- int fmt;
- static int nalloc = 0;
- skip(b, 2);
- fmt = gint(b, 2);
- if (nalloc >= Nxf){
- nalloc += 20;
- if ((Xf = realloc(Xf, nalloc*sizeof(int))) == nil)
- sysfatal("no memory\n");
- }
- Xf[Nxf++] = fmt;
- }
- void
- writeaccess(Biff *b)
- {
- Bprint(bo, "# author %s\n", gstr(b, 2));
- }
- void
- codepage(Biff *b)
- {
- int codepage = gint(b, 2);
- if (codepage != 1200) // 1200 == UTF-16
- Bprint(bo, "# codepage %d\n", codepage);
- }
- void
- xls2csv(Biobuf *bp)
- {
- int i;
- Biff biff, *b;
- struct {
- int op;
- void (*func)(Biff *);
- } dispatch[] = {
- 0x000a, eof,
- 0x0022, datemode,
- 0x0042, codepage,
- 0x0055, defcolwidth,
- 0x005c, writeaccess,
- 0x007d, colinfo,
- 0x00bd, mulrk,
- 0x00fc, sst,
- 0x00fd, labelsst,
- 0x0203, number,
- 0x0204, label,
- 0x0205, boolerr,
- 0x027e, rk,
- 0x0809, bof,
- 0x00e0, xf,
- };
-
- b = &biff;
- b->bp = bp;
- while(getrec(b) != -1){
- for (i = 0; i < nelem(dispatch); i++)
- if (b->op == dispatch[i].op)
- (*dispatch[i].func)(b);
- skip(b, b->len);
- }
- }
- void
- usage(void)
- {
- fprint(2, "usage: %s [-aDnt] [-d delim] /mnt/doc/Workbook\n", argv0);
- exits("usage");
- }
- void
- main(int argc, char *argv[])
- {
- int i;
- Biobuf bin, bout, *bp;
-
- ARGBEGIN{
- case 'n':
- Nopad = 1;
- break;
- case 't':
- Trunc = 1;
- break;
- case 'a':
- All = 1;
- break;
- case 'd':
- Delim = EARGF(usage());
- break;
- case 'D':
- Debug = 1;
- break;
- default:
- usage();
- break;
- }ARGEND;
- if (argc != 1)
- usage();
- bo = &bout;
- quotefmtinstall();
- Binit(bo, OWRITE, 1);
- if(argc > 0) {
- for(i = 0; i < argc; i++){
- if ((bp = Bopen(argv[i], OREAD)) == nil)
- sysfatal("%s cannot open - %r\n", argv[i]);
- xls2csv(bp);
- Bterm(bp);
- }
- } else {
- Binit(&bin, 0, OREAD);
- xls2csv(&bin);
- }
- exits(0);
- }
|