123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634 |
- #pragma lib "libhtml.a"
- #pragma src "/sys/src/libhtml"
- /* UTILS */
- extern uchar* fromStr(Rune* buf, int n, int chset);
- extern Rune* toStr(uchar* buf, int n, int chset);
- /* Common LEX and BUILD enums */
- /* Media types */
- enum
- {
- ApplMsword,
- ApplOctets,
- ApplPdf,
- ApplPostscript,
- ApplRtf,
- ApplFramemaker,
- ApplMsexcel,
- ApplMspowerpoint,
- UnknownType,
- Audio32kadpcm,
- AudioBasic,
- ImageCgm,
- ImageG3fax,
- ImageGif,
- ImageIef,
- ImageJpeg,
- ImagePng,
- ImageTiff,
- ImageXBit,
- ImageXBit2,
- ImageXBitmulti,
- ImageXXBitmap,
- ModelVrml,
- MultiDigest,
- MultiMixed,
- TextCss,
- TextEnriched,
- TextHtml,
- TextJavascript,
- TextPlain,
- TextRichtext,
- TextSgml,
- TextTabSeparatedValues,
- TextXml,
- VideoMpeg,
- VideoQuicktime,
- NMEDIATYPES
- };
- /* HTTP methods */
- enum
- {
- HGet,
- HPost
- };
- /* Charsets */
- enum
- {
- UnknownCharset,
- US_Ascii,
- ISO_8859_1,
- UTF_8,
- Unicode,
- NCHARSETS
- };
- /* Frame Target IDs */
- enum {
- FTtop,
- FTself,
- FTparent,
- FTblank
- };
- /* LEX */
- typedef struct Token Token;
- typedef struct Attr Attr;
- #pragma incomplete Token
- /* BUILD */
- typedef struct Item Item;
- typedef struct Itext Itext;
- typedef struct Irule Irule;
- typedef struct Iimage Iimage;
- typedef struct Iformfield Iformfield;
- typedef struct Itable Itable;
- typedef struct Ifloat Ifloat;
- typedef struct Ispacer Ispacer;
- typedef struct Genattr Genattr;
- typedef struct SEvent SEvent;
- typedef struct Formfield Formfield;
- typedef struct Option Option;
- typedef struct Form Form;
- typedef struct Table Table;
- typedef struct Tablecol Tablecol;
- typedef struct Tablerow Tablerow;
- typedef struct Tablecell Tablecell;
- typedef struct Align Align;
- typedef struct Dimen Dimen;
- typedef struct Anchor Anchor;
- typedef struct DestAnchor DestAnchor;
- typedef struct Map Map;
- typedef struct Area Area;
- typedef struct Background Background;
- typedef struct Kidinfo Kidinfo;
- typedef struct Docinfo Docinfo;
- typedef struct Stack Stack;
- typedef struct Pstate Pstate;
- typedef struct ItemSource ItemSource;
- typedef struct Lay Lay; /* defined in Layout module */
- #pragma incomplete Lay
- /* Alignment types */
- enum {
- ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
- ALchar, ALtop, ALmiddle, ALbottom, ALbaseline,
- };
- struct Align
- {
- uchar halign; /* one of ALnone, ALleft, etc. */
- uchar valign; /* one of ALnone, ALtop, etc. */
- };
- /*
- * A Dimen holds a dimension specification, especially for those
- * cases when a number can be followed by a % or a * to indicate
- * percentage of total or relative weight.
- * Dnone means no dimension was specified
- */
- /* To fit in a word, use top bits to identify kind, rest for value */
- enum {
- Dnone = 0,
- Dpixels = (1<<29),
- Dpercent = (2<<29),
- Drelative = (3<<29),
- Dkindmask = (3<<29),
- Dspecmask = (~Dkindmask)
- };
- struct Dimen
- {
- int kindspec; /* kind | spec */
- };
- /*
- * Background is either an image or a color.
- * If both are set, the image has precedence.
- */
- struct Background
- {
- Rune* image; /* url */
- int color;
- };
- /*
- * There are about a half dozen Item variants.
- * The all look like this at the start (using Plan 9 C's
- * anonymous structure member mechanism),
- * and then the tag field dictates what extra fields there are.
- */
- struct Item
- {
- Item* next; /* successor in list of items */
- int width; /* width in pixels (0 for floating items) */
- int height; /* height in pixels */
- int ascent; /* ascent (from top to baseline) in pixels */
- int anchorid; /* if nonzero, which anchor we're in */
- int state; /* flags and values (see below) */
- Genattr*genattr; /* generic attributes and events */
- int tag; /* variant discriminator: Itexttag, etc. */
- };
- /* Item variant tags */
- enum {
- Itexttag,
- Iruletag,
- Iimagetag,
- Iformfieldtag,
- Itabletag,
- Ifloattag,
- Ispacertag
- };
- struct Itext
- {
- Item; /* (with tag ==Itexttag) */
- Rune* s; /* the characters */
- int fnt; /* style*NumSize+size (see font stuff, below) */
- int fg; /* Pixel (color) for text */
- uchar voff; /* Voffbias+vertical offset from baseline, in pixels (+ve == down) */
- uchar ul; /* ULnone, ULunder, or ULmid */
- };
- struct Irule
- {
- Item; /* (with tag ==Iruletag) */
- uchar align; /* alignment spec */
- uchar noshade; /* if true, don't shade */
- int size; /* size attr (rule height) */
- int color; /* color attr */
- Dimen wspec; /* width spec */
- };
- struct Iimage
- {
- Item; /* (with tag ==Iimagetag) */
- Rune* imsrc; /* image src url */
- int imwidth; /* spec width (actual, if no spec) */
- int imheight; /* spec height (actual, if no spec) */
- Rune* altrep; /* alternate representation, in absence of image */
- Map* map; /* if non-nil, client side map */
- int ctlid; /* if animated */
- uchar align; /* vertical alignment */
- uchar hspace; /* in pixels; buffer space on each side */
- uchar vspace; /* in pixels; buffer space on top and bottom */
- uchar border; /* in pixels: border width to draw around image */
- Iimage* nextimage; /* next in list of document's images */
- void* aux;
- };
- struct Iformfield
- {
- Item; /* (with tag ==Iformfieldtag) */
- Formfield*formfield;
- void* aux;
- };
- struct Itable
- {
- Item; /* (with tag ==Itabletag) */
- Table* table;
- };
- struct Ifloat
- {
- Item; /* (with tag ==Ifloattag) */
- Item* item; /* table or image item that floats */
- int x; /* x coord of top (from right, if ALright) */
- int y; /* y coord of top */
- uchar side; /* margin it floats to: ALleft or ALright */
- uchar infloats; /* true if this has been added to a lay.floats */
- Ifloat* nextfloat; /* in list of floats */
- };
- struct Ispacer
- {
- Item; /* (with tag ==Ispacertag) */
- int spkind; /* ISPnull, etc. */
- };
- /* Item state flags and value fields */
- enum {
- IFbrk = 0x80000000, /* forced break before this item */
- IFbrksp = 0x40000000, /* add 1 line space to break (IFbrk set too) */
- IFnobrk = 0x20000000, /* break not allowed before this item */
- IFcleft = 0x10000000, /* clear left floats (IFbrk set too) */
- IFcright= 0x08000000, /* clear right floats (IFbrk set too) */
- IFwrap = 0x04000000, /* in a wrapping (non-pre) line */
- IFhang = 0x02000000, /* in a hanging (into left indent) item */
- IFrjust = 0x01000000, /* right justify current line */
- IFcjust = 0x00800000, /* center justify current line */
- IFsmap = 0x00400000, /* image is server-side map */
- IFindentshift = 8,
- IFindentmask = (255<<IFindentshift), /* current indent, in tab stops */
- IFhangmask = 255 /* current hang into left indent, in 1/10th tabstops */
- };
- /* Bias added to Itext's voff field */
- enum { Voffbias = 128 };
- /* Spacer kinds */
- enum {
- ISPnull, /* 0 height and width */
- ISPvline, /* height and ascent of current font */
- ISPhspace, /* width of space in current font */
- ISPgeneral /* other purposes (e.g., between markers and list) */
- };
- /* Generic attributes and events (not many elements will have any of these set) */
- struct Genattr
- {
- Rune* id;
- Rune* class;
- Rune* style;
- Rune* title;
- SEvent* events;
- };
- struct SEvent
- {
- SEvent* next; /* in list of events */
- int type; /* SEonblur, etc. */
- Rune* script;
- };
- enum {
- SEonblur, SEonchange, SEonclick, SEondblclick,
- SEonfocus, SEonkeypress, SEonkeyup, SEonload,
- SEonmousedown, SEonmousemove, SEonmouseout,
- SEonmouseover, SEonmouseup, SEonreset, SEonselect,
- SEonsubmit, SEonunload,
- Numscriptev
- };
- /* Form field types */
- enum {
- Ftext,
- Fpassword,
- Fcheckbox,
- Fradio,
- Fsubmit,
- Fhidden,
- Fimage,
- Freset,
- Ffile,
- Fbutton,
- Fselect,
- Ftextarea
- };
- /* Information about a field in a form */
- struct Formfield
- {
- Formfield*next; /* in list of fields for a form */
- int ftype; /* Ftext, Fpassword, etc. */
- int fieldid; /* serial no. of field within its form */
- Form* form; /* containing form */
- Rune* name; /* name attr */
- Rune* value; /* value attr */
- int size; /* size attr */
- int maxlength; /* maxlength attr */
- int rows; /* rows attr */
- int cols; /* cols attr */
- uchar flags; /* FFchecked, etc. */
- Option* options; /* for Fselect fields */
- Item* image; /* image item, for Fimage fields */
- int ctlid; /* identifies control for this field in layout */
- SEvent* events; /* same as genattr->events of containing item */
- };
- enum {
- FFchecked = (1<<7),
- FFmultiple = (1<<6)
- };
- /* Option holds info about an option in a "select" form field */
- struct Option
- {
- Option* next; /* next in list of options for a field */
- int selected; /* true if selected initially */
- Rune* value; /* value attr */
- Rune* display; /* display string */
- };
- /* Form holds info about a form */
- struct Form
- {
- Form* next; /* in list of forms for document */
- int formid; /* serial no. of form within its doc */
- Rune* name; /* name or id attr (netscape uses name, HTML 4.0 uses id) */
- Rune* action; /* action attr */
- int target; /* target attr as targetid */
- int method; /* HGet or HPost */
- int nfields; /* number of fields */
- Formfield*fields; /* field's forms, in input order */
- };
- /* Flags used in various table structures */
- enum {
- TFparsing = (1<<7),
- TFnowrap = (1<<6),
- TFisth = (1<<5)
- };
- /* Information about a table */
- struct Table
- {
- Table* next; /* next in list of document's tables */
- int tableid; /* serial no. of table within its doc */
- Tablerow*rows; /* array of row specs (list during parsing) */
- int nrow; /* total number of rows */
- Tablecol*cols; /* array of column specs */
- int ncol; /* total number of columns */
- Tablecell*cells; /* list of unique cells */
- int ncell; /* total number of cells */
- Tablecell***grid; /* 2-D array of cells */
- Align align; /* alignment spec for whole table */
- Dimen width; /* width spec for whole table */
- int border; /* border attr */
- int cellspacing; /* cellspacing attr */
- int cellpadding; /* cellpadding attr */
- Background background; /* table background */
- Item* caption; /* linked list of Items, giving caption */
- uchar caption_place; /* ALtop or ALbottom */
- Lay* caption_lay; /* layout of caption */
- int totw; /* total width */
- int toth; /* total height */
- int caph; /* caption height */
- int availw; /* used for previous 3 sizes */
- Token* tabletok; /* token that started the table */
- uchar flags; /* Lchanged, perhaps */
- };
- struct Tablecol
- {
- int width;
- Align align;
- Point pos;
- };
- struct Tablerow
- {
- Tablerow*next; /* Next in list of rows, during parsing */
- Tablecell*cells; /* Cells in row, linked through nextinrow */
- int height;
- int ascent;
- Align align;
- Background background;
- Point pos;
- uchar flags; /* 0 or TFparsing */
- };
- /*
- * A Tablecell is one cell of a table.
- * It may span multiple rows and multiple columns.
- * Cells are linked on two lists: the list for all the cells of
- * a document (the next pointers), and the list of all the
- * cells that start in a given row (the nextinrow pointers)
- */
- struct Tablecell
- {
- Tablecell*next; /* next in list of table's cells */
- Tablecell*nextinrow; /* next in list of row's cells */
- int cellid; /* serial no. of cell within table */
- Item* content; /* contents before layout */
- Lay* lay; /* layout of cell */
- int rowspan; /* number of rows spanned by this cell */
- int colspan; /* number of cols spanned by this cell */
- Align align; /* alignment spec */
- uchar flags; /* TFparsing, TFnowrap, TFisth */
- Dimen wspec; /* suggested width */
- int hspec; /* suggested height */
- Background background; /* cell background */
- int minw; /* minimum possible width */
- int maxw; /* maximum width */
- int ascent; /* cell's ascent */
- int row; /* row of upper left corner */
- int col; /* col of upper left corner */
- Point pos; /* nw corner of cell contents, in cell */
- };
- /* Anchor is for info about hyperlinks that go somewhere */
- struct Anchor
- {
- Anchor* next; /* next in list of document's anchors */
- int index; /* serial no. of anchor within its doc */
- Rune* name; /* name attr */
- Rune* href; /* href attr */
- int target; /* target attr as targetid */
- };
- /* DestAnchor is for info about hyperlinks that are destinations */
- struct DestAnchor
- {
- DestAnchor*next; /* next in list of document's destanchors */
- int index; /* serial no. of anchor within its doc */
- Rune* name; /* name attr */
- Item* item; /* the destination */
- };
- /* Maps (client side) */
- struct Map
- {
- Map* next; /* next in list of document's maps */
- Rune* name; /* map name */
- Area* areas; /* list of map areas */
- };
- struct Area
- {
- Area* next; /* next in list of a map's areas */
- int shape; /* SHrect, etc. */
- Rune* href; /* associated hypertext link */
- int target; /* associated target frame */
- Dimen* coords; /* array of coords for shape */
- int ncoords; /* size of coords array */
- };
- /* Area shapes */
- enum {
- SHrect, SHcircle, SHpoly
- };
- /* Fonts are represented by integers: style*NumSize + size */
- /* Font styles */
- enum {
- FntR, /* roman */
- FntI, /* italic */
- FntB, /* bold */
- FntT, /* typewriter */
- NumStyle
- };
- /* Font sizes */
- enum {
- Tiny,
- Small,
- Normal,
- Large,
- Verylarge,
- NumSize
- };
- enum {
- NumFnt = NumStyle*NumSize,
- DefFnt = FntR*NumSize+Normal,
- };
- /* Lines are needed through some text items, for underlining or strikethrough */
- enum {
- ULnone, ULunder, ULmid
- };
- /* Kidinfo flags */
- enum {
- FRnoresize = (1<<0),
- FRnoscroll = (1<<1),
- FRhscroll = (1<<2),
- FRvscroll = (1<<3),
- FRhscrollauto = (1<<4),
- FRvscrollauto = (1<<5)
- };
- /* Information about child frame or frameset */
- struct Kidinfo
- {
- Kidinfo*next; /* in list of kidinfos for a frameset */
- int isframeset;
- /* fields for "frame" */
- Rune* src; /* only nil if a "dummy" frame or this is frameset */
- Rune* name; /* always non-empty if this isn't frameset */
- int marginw;
- int marginh;
- int framebd;
- int flags;
- /* fields for "frameset" */
- Dimen* rows; /* array of row dimensions */
- int nrows; /* length of rows */
- Dimen* cols; /* array of col dimensions */
- int ncols; /* length of cols */
- Kidinfo*kidinfos;
- Kidinfo*nextframeset; /* parsing stack */
- };
- /* Document info (global information about HTML page) */
- struct Docinfo
- {
- /* stuff from HTTP headers, doc head, and body tag */
- Rune* src; /* original source of doc */
- Rune* base; /* base URL of doc */
- Rune* doctitle; /* from <title> element */
- Background background; /* background specification */
- Iimage* backgrounditem; /* Image Item for doc background image, or nil */
- int text; /* doc foreground (text) color */
- int link; /* unvisited hyperlink color */
- int vlink; /* visited hyperlink color */
- int alink; /* highlighting hyperlink color */
- int target; /* target frame default */
- int chset; /* ISO_8859, etc. */
- int mediatype; /* TextHtml, etc. */
- int scripttype; /* TextJavascript, etc. */
- int hasscripts; /* true if scripts used */
- Rune* refresh; /* content of <http-equiv=Refresh ...> */
- Kidinfo*kidinfo; /* if a frameset */
- int frameid; /* id of document frame */
- /* info needed to respond to user actions */
- Anchor* anchors; /* list of href anchors */
- DestAnchor*dests; /* list of destination anchors */
- Form* forms; /* list of forms */
- Table* tables; /* list of tables */
- Map* maps; /* list of maps */
- Iimage* images; /* list of image items (through nextimage links) */
- };
- extern int dimenkind(Dimen d);
- extern int dimenspec(Dimen d);
- extern void freedocinfo(Docinfo* d);
- extern void freeitems(Item* ithead);
- extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
- extern void printitems(Item* items, char* msg);
- extern int targetid(Rune* s);
- extern Rune* targetname(int targid);
- extern int validitems(Item* i);
- #pragma varargck type "I" Item*
- /* Control print output */
- extern int warn;
- extern int dbglex;
- extern int dbgbuild;
- /*
- * To be provided by caller
- * emalloc and erealloc should not return if can't get memory.
- * emalloc should zero its memory.
- */
- extern void* emalloc(ulong);
- extern void* erealloc(void* p, ulong size);
|