#pragma lib "libhtml.a"
#pragma src "/sys/src/libhtml"
// UTILS
extern uchar* fromStr(Rune* buf, int n, int chset);
extern Rune* toStr(uchar* buf, int n, int chset);
// Common LEX and BUILD enums
// Media types
enum
{
ApplMsword,
ApplOctets,
ApplPdf,
ApplPostscript,
ApplRtf,
ApplFramemaker,
ApplMsexcel,
ApplMspowerpoint,
UnknownType,
Audio32kadpcm,
AudioBasic,
ImageCgm,
ImageG3fax,
ImageGif,
ImageIef,
ImageJpeg,
ImagePng,
ImageTiff,
ImageXBit,
ImageXBit2,
ImageXBitmulti,
ImageXXBitmap,
ModelVrml,
MultiDigest,
MultiMixed,
TextCss,
TextEnriched,
TextHtml,
TextJavascript,
TextPlain,
TextRichtext,
TextSgml,
TextTabSeparatedValues,
TextXml,
VideoMpeg,
VideoQuicktime,
NMEDIATYPES
};
// HTTP methods
enum
{
HGet,
HPost
};
// Charsets
enum
{
UnknownCharset,
US_Ascii,
ISO_8859_1,
UTF_8,
Unicode,
NCHARSETS
};
// Frame Target IDs
enum {
FTtop,
FTself,
FTparent,
FTblank
};
// LEX
typedef struct Token Token;
typedef struct Attr Attr;
// BUILD
typedef struct Item Item;
typedef struct Itext Itext;
typedef struct Irule Irule;
typedef struct Iimage Iimage;
typedef struct Iformfield Iformfield;
typedef struct Itable Itable;
typedef struct Ifloat Ifloat;
typedef struct Ispacer Ispacer;
typedef struct Genattr Genattr;
typedef struct SEvent SEvent;
typedef struct Formfield Formfield;
typedef struct Option Option;
typedef struct Form Form;
typedef struct Table Table;
typedef struct Tablecol Tablecol;
typedef struct Tablerow Tablerow;
typedef struct Tablecell Tablecell;
typedef struct Align Align;
typedef struct Dimen Dimen;
typedef struct Anchor Anchor;
typedef struct DestAnchor DestAnchor;
typedef struct Map Map;
typedef struct Area Area;
typedef struct Background Background;
typedef struct Kidinfo Kidinfo;
typedef struct Docinfo Docinfo;
typedef struct Stack Stack;
typedef struct Pstate Pstate;
typedef struct ItemSource ItemSource;
typedef struct Lay Lay; // defined in Layout module
// Alignment types
enum {
ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
ALchar, ALtop, ALmiddle, ALbottom, ALbaseline
};
struct Align
{
uchar halign; // one of ALnone, ALleft, etc.
uchar valign; // one of ALnone, ALtop, etc.
};
// A Dimen holds a dimension specification, especially for those
// cases when a number can be followed by a % or a * to indicate
// percentage of total or relative weight.
// Dnone means no dimension was specified
// To fit in a word, use top bits to identify kind, rest for value
enum {
Dnone = 0,
Dpixels = (1<<29),
Dpercent = (2<<29),
Drelative = (3<<29),
Dkindmask = (3<<29),
Dspecmask = (~Dkindmask)
};
struct Dimen
{
int kindspec; // kind | spec
};
// Background is either an image or a color.
// If both are set, the image has precedence.
struct Background
{
Rune* image; // url
int color;
};
// There are about a half dozen Item variants.
// The all look like this at the start (using Plan 9 C's
// anonymous structure member mechanism),
// and then the tag field dictates what extra fields there are.
struct Item
{
Item* next; // successor in list of items
int width; // width in pixels (0 for floating items)
int height; // height in pixels
int ascent; // ascent (from top to baseline) in pixels
int anchorid; // if nonzero, which anchor we're in
int state; // flags and values (see below)
Genattr* genattr; // generic attributes and events
int tag; // variant discriminator: Itexttag, etc.
};
// Item variant tags
enum {
Itexttag,
Iruletag,
Iimagetag,
Iformfieldtag,
Itabletag,
Ifloattag,
Ispacertag
};
struct Itext
{
Item; // (with tag ==Itexttag)
Rune* s; // the characters
int fnt; // style*NumSize+size (see font stuff, below)
int fg; // Pixel (color) for text
uchar voff; // Voffbias+vertical offset from baseline, in pixels (+ve == down)
uchar ul; // ULnone, ULunder, or ULmid
};
struct Irule
{
Item; // (with tag ==Iruletag)
uchar align; // alignment spec
uchar noshade; // if true, don't shade
int size; // size attr (rule height)
Dimen wspec; // width spec
};
struct Iimage
{
Item; // (with tag ==Iimagetag)
Rune* imsrc; // image src url
int imwidth; // spec width (actual, if no spec)
int imheight; // spec height (actual, if no spec)
Rune* altrep; // alternate representation, in absence of image
Map* map; // if non-nil, client side map
int ctlid; // if animated
uchar align; // vertical alignment
uchar hspace; // in pixels; buffer space on each side
uchar vspace; // in pixels; buffer space on top and bottom
uchar border; // in pixels: border width to draw around image
Iimage* nextimage; // next in list of document's images
};
struct Iformfield
{
Item; // (with tag ==Iformfieldtag)
Formfield* formfield;
};
struct Itable
{
Item; // (with tag ==Itabletag)
Table* table;
};
struct Ifloat
{
Item; // (with tag ==Ifloattag)
Item* item; // table or image item that floats
int x; // x coord of top (from right, if ALright)
int y; // y coord of top
uchar side; // margin it floats to: ALleft or ALright
uchar infloats; // true if this has been added to a lay.floats
Ifloat* nextfloat; // in list of floats
};
struct Ispacer
{
Item; // (with tag ==Ispacertag)
int spkind; // ISPnull, etc.
};
// Item state flags and value fields
enum {
IFbrk = 0x80000000, // forced break before this item
IFbrksp = 0x40000000, // add 1 line space to break (IFbrk set too)
IFnobrk = 0x20000000, // break not allowed before this item
IFcleft = 0x10000000, // clear left floats (IFbrk set too)
IFcright = 0x08000000, // clear right floats (IFbrk set too)
IFwrap = 0x04000000, // in a wrapping (non-pre) line
IFhang = 0x02000000, // in a hanging (into left indent) item
IFrjust = 0x01000000, // right justify current line
IFcjust = 0x00800000, // center justify current line
IFsmap = 0x00400000, // image is server-side map
IFindentshift = 8,
IFindentmask = (255<events of containing item
};
enum {
FFchecked = (1<<7),
FFmultiple = (1<<6)
};
// Option holds info about an option in a "select" form field
struct Option
{
Option* next; // next in list of options for a field
int selected; // true if selected initially
Rune* value; // value attr
Rune* display; // display string
};
// Form holds info about a form
struct Form
{
Form* next; // in list of forms for document
int formid; // serial no. of form within its doc
Rune* name; // name or id attr (netscape uses name, HTML 4.0 uses id)
Rune* action; // action attr
int target; // target attr as targetid
int method; // HGet or HPost
int nfields; // number of fields
Formfield* fields; // field's forms, in input order
};
// Flags used in various table structures
enum {
TFparsing = (1<<7),
TFnowrap = (1<<6),
TFisth = (1<<5)
};
// Information about a table
struct Table
{
Table* next; // next in list of document's tables
int tableid; // serial no. of table within its doc
Tablerow* rows; // array of row specs (list during parsing)
int nrow; // total number of rows
Tablecol* cols; // array of column specs
int ncol; // total number of columns
Tablecell* cells; // list of unique cells
int ncell; // total number of cells
Tablecell*** grid; // 2-D array of cells
Align align; // alignment spec for whole table
Dimen width; // width spec for whole table
int border; // border attr
int cellspacing; // cellspacing attr
int cellpadding; // cellpadding attr
Background background; // table background
Item* caption; // linked list of Items, giving caption
uchar caption_place; // ALtop or ALbottom
Lay* caption_lay; // layout of caption
int totw; // total width
int toth; // total height
int caph; // caption height
int availw; // used for previous 3 sizes
Token* tabletok; // token that started the table
uchar flags; // Lchanged, perhaps
};
struct Tablecol
{
int width;
Align align;
Point pos;
};
struct Tablerow
{
Tablerow* next; // Next in list of rows, during parsing
Tablecell* cells; // Cells in row, linked through nextinrow
int height;
int ascent;
Align align;
Background background;
Point pos;
uchar flags; // 0 or TFparsing
};
// A Tablecell is one cell of a table.
// It may span multiple rows and multiple columns.
// Cells are linked on two lists: the list for all the cells of
// a document (the next pointers), and the list of all the
// cells that start in a given row (the nextinrow pointers)
struct Tablecell
{
Tablecell* next; // next in list of table's cells
Tablecell* nextinrow; // next in list of row's cells
int cellid; // serial no. of cell within table
Item* content; // contents before layout
Lay* lay; // layout of cell
int rowspan; // number of rows spanned by this cell
int colspan; // number of cols spanned by this cell
Align align; // alignment spec
uchar flags; // TFparsing, TFnowrap, TFisth
Dimen wspec; // suggested width
int hspec; // suggested height
Background background; // cell background
int minw; // minimum possible width
int maxw; // maximum width
int ascent; // cell's ascent
int row; // row of upper left corner
int col; // col of upper left corner
Point pos; // nw corner of cell contents, in cell
};
// Anchor is for info about hyperlinks that go somewhere
struct Anchor
{
Anchor* next; // next in list of document's anchors
int index; // serial no. of anchor within its doc
Rune* name; // name attr
Rune* href; // href attr
int target; // target attr as targetid
};
// DestAnchor is for info about hyperlinks that are destinations
struct DestAnchor
{
DestAnchor* next; // next in list of document's destanchors
int index; // serial no. of anchor within its doc
Rune* name; // name attr
Item* item; // the destination
};
// Maps (client side)
struct Map
{
Map* next; // next in list of document's maps
Rune* name; // map name
Area* areas; // list of map areas
};
struct Area
{
Area* next; // next in list of a map's areas
int shape; // SHrect, etc.
Rune* href; // associated hypertext link
int target; // associated target frame
Dimen* coords; // array of coords for shape
int ncoords; // size of coords array
};
// Area shapes
enum {
SHrect, SHcircle, SHpoly
};
// Fonts are represented by integers: style*NumSize + size
// Font styles
enum {
FntR, // roman
FntI, // italic
FntB, // bold
FntT, // typewriter
NumStyle
};
// Font sizes
enum {
Tiny,
Small,
Normal,
Large,
Verylarge,
NumSize
};
enum {
NumFnt = (NumStyle*NumSize),
DefFnt = (FntR*NumSize+Normal)
};
// Lines are needed through some text items, for underlining or strikethrough
enum {
ULnone, ULunder, ULmid
};
// Kidinfo flags
enum {
FRnoresize = (1<<0),
FRnoscroll = (1<<1),
FRhscroll = (1<<2),
FRvscroll = (1<<3),
FRhscrollauto = (1<<4),
FRvscrollauto = (1<<5)
};
// Information about child frame or frameset
struct Kidinfo
{
Kidinfo* next; // in list of kidinfos for a frameset
int isframeset;
// fields for "frame"
Rune* src; // only nil if a "dummy" frame or this is frameset
Rune* name; // always non-empty if this isn't frameset
int marginw;
int marginh;
int framebd;
int flags;
// fields for "frameset"
Dimen* rows; // array of row dimensions
int nrows; // length of rows
Dimen* cols; // array of col dimensions
int ncols; // length of cols
Kidinfo* kidinfos;
Kidinfo* nextframeset; // parsing stack
};
// Document info (global information about HTML page)
struct Docinfo
{
// stuff from HTTP headers, doc head, and body tag
Rune* src; // original source of doc
Rune* base; // base URL of doc
Rune* doctitle; // from element
Background background; // background specification
Iimage* backgrounditem; // Image Item for doc background image, or nil
int text; // doc foreground (text) color
int link; // unvisited hyperlink color
int vlink; // visited hyperlink color
int alink; // highlighting hyperlink color
int target; // target frame default
int chset; // ISO_8859, etc.
int mediatype; // TextHtml, etc.
int scripttype; // TextJavascript, etc.
int hasscripts; // true if scripts used
Rune* refresh; // content of
Kidinfo* kidinfo; // if a frameset
int frameid; // id of document frame
// info needed to respond to user actions
Anchor* anchors; // list of href anchors
DestAnchor* dests; // list of destination anchors
Form* forms; // list of forms
Table* tables; // list of tables
Map* maps; // list of maps
Iimage* images; // list of image items (through nextimage links)
};
extern int dimenkind(Dimen d);
extern int dimenspec(Dimen d);
extern void freedocinfo(Docinfo* d);
extern void freeitems(Item* ithead);
extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
extern void printitems(Item* items, char* msg);
extern int targetid(Rune* s);
extern Rune* targetname(int targid);
extern int validitems(Item* i);
#pragma varargck type "I" Item*
// Control print output
extern int warn;
extern int dbglex;
extern int dbgbuild;
// To be provided by caller
// emalloc and erealloc should not return if can't get memory.
// emalloc should zero its memory.
extern void* emalloc(ulong);
extern void* erealloc(void* p, ulong size);