html.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624
  1. #pragma lib "libhtml.a"
  2. #pragma src "/sys/src/libhtml"
  3. // UTILS
  4. extern uchar* fromStr(Rune* buf, int n, int chset);
  5. extern Rune* toStr(uchar* buf, int n, int chset);
  6. // Common LEX and BUILD enums
  7. // Media types
  8. enum
  9. {
  10. ApplMsword,
  11. ApplOctets,
  12. ApplPdf,
  13. ApplPostscript,
  14. ApplRtf,
  15. ApplFramemaker,
  16. ApplMsexcel,
  17. ApplMspowerpoint,
  18. UnknownType,
  19. Audio32kadpcm,
  20. AudioBasic,
  21. ImageCgm,
  22. ImageG3fax,
  23. ImageGif,
  24. ImageIef,
  25. ImageJpeg,
  26. ImagePng,
  27. ImageTiff,
  28. ImageXBit,
  29. ImageXBit2,
  30. ImageXBitmulti,
  31. ImageXXBitmap,
  32. ModelVrml,
  33. MultiDigest,
  34. MultiMixed,
  35. TextCss,
  36. TextEnriched,
  37. TextHtml,
  38. TextJavascript,
  39. TextPlain,
  40. TextRichtext,
  41. TextSgml,
  42. TextTabSeparatedValues,
  43. TextXml,
  44. VideoMpeg,
  45. VideoQuicktime,
  46. NMEDIATYPES
  47. };
  48. // HTTP methods
  49. enum
  50. {
  51. HGet,
  52. HPost
  53. };
  54. // Charsets
  55. enum
  56. {
  57. UnknownCharset,
  58. US_Ascii,
  59. ISO_8859_1,
  60. UTF_8,
  61. Unicode,
  62. NCHARSETS
  63. };
  64. // Frame Target IDs
  65. enum {
  66. FTtop,
  67. FTself,
  68. FTparent,
  69. FTblank
  70. };
  71. // LEX
  72. typedef struct Token Token;
  73. typedef struct Attr Attr;
  74. #pragma incomplete Token
  75. // BUILD
  76. typedef struct Item Item;
  77. typedef struct Itext Itext;
  78. typedef struct Irule Irule;
  79. typedef struct Iimage Iimage;
  80. typedef struct Iformfield Iformfield;
  81. typedef struct Itable Itable;
  82. typedef struct Ifloat Ifloat;
  83. typedef struct Ispacer Ispacer;
  84. typedef struct Genattr Genattr;
  85. typedef struct SEvent SEvent;
  86. typedef struct Formfield Formfield;
  87. typedef struct Option Option;
  88. typedef struct Form Form;
  89. typedef struct Table Table;
  90. typedef struct Tablecol Tablecol;
  91. typedef struct Tablerow Tablerow;
  92. typedef struct Tablecell Tablecell;
  93. typedef struct Align Align;
  94. typedef struct Dimen Dimen;
  95. typedef struct Anchor Anchor;
  96. typedef struct DestAnchor DestAnchor;
  97. typedef struct Map Map;
  98. typedef struct Area Area;
  99. typedef struct Background Background;
  100. typedef struct Kidinfo Kidinfo;
  101. typedef struct Docinfo Docinfo;
  102. typedef struct Stack Stack;
  103. typedef struct Pstate Pstate;
  104. typedef struct ItemSource ItemSource;
  105. typedef struct Lay Lay; // defined in Layout module
  106. #pragma incomplete Lay
  107. // Alignment types
  108. enum {
  109. ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
  110. ALchar, ALtop, ALmiddle, ALbottom, ALbaseline
  111. };
  112. struct Align
  113. {
  114. uchar halign; // one of ALnone, ALleft, etc.
  115. uchar valign; // one of ALnone, ALtop, etc.
  116. };
  117. // A Dimen holds a dimension specification, especially for those
  118. // cases when a number can be followed by a % or a * to indicate
  119. // percentage of total or relative weight.
  120. // Dnone means no dimension was specified
  121. // To fit in a word, use top bits to identify kind, rest for value
  122. enum {
  123. Dnone = 0,
  124. Dpixels = (1<<29),
  125. Dpercent = (2<<29),
  126. Drelative = (3<<29),
  127. Dkindmask = (3<<29),
  128. Dspecmask = (~Dkindmask)
  129. };
  130. struct Dimen
  131. {
  132. int kindspec; // kind | spec
  133. };
  134. // Background is either an image or a color.
  135. // If both are set, the image has precedence.
  136. struct Background
  137. {
  138. Rune* image; // url
  139. int color;
  140. };
  141. // There are about a half dozen Item variants.
  142. // The all look like this at the start (using Plan 9 C's
  143. // anonymous structure member mechanism),
  144. // and then the tag field dictates what extra fields there are.
  145. struct Item
  146. {
  147. Item* next; // successor in list of items
  148. int width; // width in pixels (0 for floating items)
  149. int height; // height in pixels
  150. int ascent; // ascent (from top to baseline) in pixels
  151. int anchorid; // if nonzero, which anchor we're in
  152. int state; // flags and values (see below)
  153. Genattr* genattr; // generic attributes and events
  154. int tag; // variant discriminator: Itexttag, etc.
  155. };
  156. // Item variant tags
  157. enum {
  158. Itexttag,
  159. Iruletag,
  160. Iimagetag,
  161. Iformfieldtag,
  162. Itabletag,
  163. Ifloattag,
  164. Ispacertag
  165. };
  166. struct Itext
  167. {
  168. Item; // (with tag ==Itexttag)
  169. Rune* s; // the characters
  170. int fnt; // style*NumSize+size (see font stuff, below)
  171. int fg; // Pixel (color) for text
  172. uchar voff; // Voffbias+vertical offset from baseline, in pixels (+ve == down)
  173. uchar ul; // ULnone, ULunder, or ULmid
  174. };
  175. struct Irule
  176. {
  177. Item; // (with tag ==Iruletag)
  178. uchar align; // alignment spec
  179. uchar noshade; // if true, don't shade
  180. int size; // size attr (rule height)
  181. Dimen wspec; // width spec
  182. };
  183. struct Iimage
  184. {
  185. Item; // (with tag ==Iimagetag)
  186. Rune* imsrc; // image src url
  187. int imwidth; // spec width (actual, if no spec)
  188. int imheight; // spec height (actual, if no spec)
  189. Rune* altrep; // alternate representation, in absence of image
  190. Map* map; // if non-nil, client side map
  191. int ctlid; // if animated
  192. uchar align; // vertical alignment
  193. uchar hspace; // in pixels; buffer space on each side
  194. uchar vspace; // in pixels; buffer space on top and bottom
  195. uchar border; // in pixels: border width to draw around image
  196. Iimage* nextimage; // next in list of document's images
  197. void* aux;
  198. };
  199. struct Iformfield
  200. {
  201. Item; // (with tag ==Iformfieldtag)
  202. Formfield* formfield;
  203. void* aux;
  204. };
  205. struct Itable
  206. {
  207. Item; // (with tag ==Itabletag)
  208. Table* table;
  209. };
  210. struct Ifloat
  211. {
  212. Item; // (with tag ==Ifloattag)
  213. Item* item; // table or image item that floats
  214. int x; // x coord of top (from right, if ALright)
  215. int y; // y coord of top
  216. uchar side; // margin it floats to: ALleft or ALright
  217. uchar infloats; // true if this has been added to a lay.floats
  218. Ifloat* nextfloat; // in list of floats
  219. };
  220. struct Ispacer
  221. {
  222. Item; // (with tag ==Ispacertag)
  223. int spkind; // ISPnull, etc.
  224. };
  225. // Item state flags and value fields
  226. enum {
  227. IFbrk = 0x80000000, // forced break before this item
  228. IFbrksp = 0x40000000, // add 1 line space to break (IFbrk set too)
  229. IFnobrk = 0x20000000, // break not allowed before this item
  230. IFcleft = 0x10000000, // clear left floats (IFbrk set too)
  231. IFcright = 0x08000000, // clear right floats (IFbrk set too)
  232. IFwrap = 0x04000000, // in a wrapping (non-pre) line
  233. IFhang = 0x02000000, // in a hanging (into left indent) item
  234. IFrjust = 0x01000000, // right justify current line
  235. IFcjust = 0x00800000, // center justify current line
  236. IFsmap = 0x00400000, // image is server-side map
  237. IFindentshift = 8,
  238. IFindentmask = (255<<IFindentshift), // current indent, in tab stops
  239. IFhangmask = 255 // current hang into left indent, in 1/10th tabstops
  240. };
  241. // Bias added to Itext's voff field
  242. enum { Voffbias = 128 };
  243. // Spacer kinds
  244. enum {
  245. ISPnull, // 0 height and width
  246. ISPvline, // height and ascent of current font
  247. ISPhspace, // width of space in current font
  248. ISPgeneral // other purposes (e.g., between markers and list)
  249. };
  250. // Generic attributes and events (not many elements will have any of these set)
  251. struct Genattr
  252. {
  253. Rune* id;
  254. Rune* class;
  255. Rune* style;
  256. Rune* title;
  257. SEvent* events;
  258. };
  259. struct SEvent
  260. {
  261. SEvent* next; // in list of events
  262. int type; // SEonblur, etc.
  263. Rune* script;
  264. };
  265. enum {
  266. SEonblur, SEonchange, SEonclick, SEondblclick,
  267. SEonfocus, SEonkeypress, SEonkeyup, SEonload,
  268. SEonmousedown, SEonmousemove, SEonmouseout,
  269. SEonmouseover, SEonmouseup, SEonreset, SEonselect,
  270. SEonsubmit, SEonunload,
  271. Numscriptev
  272. };
  273. // Form field types
  274. enum {
  275. Ftext,
  276. Fpassword,
  277. Fcheckbox,
  278. Fradio,
  279. Fsubmit,
  280. Fhidden,
  281. Fimage,
  282. Freset,
  283. Ffile,
  284. Fbutton,
  285. Fselect,
  286. Ftextarea
  287. };
  288. // Information about a field in a form
  289. struct Formfield
  290. {
  291. Formfield* next; // in list of fields for a form
  292. int ftype; // Ftext, Fpassword, etc.
  293. int fieldid; // serial no. of field within its form
  294. Form* form; // containing form
  295. Rune* name; // name attr
  296. Rune* value; // value attr
  297. int size; // size attr
  298. int maxlength; // maxlength attr
  299. int rows; // rows attr
  300. int cols; // cols attr
  301. uchar flags; // FFchecked, etc.
  302. Option* options; // for Fselect fields
  303. Item* image; // image item, for Fimage fields
  304. int ctlid; // identifies control for this field in layout
  305. SEvent* events; // same as genattr->events of containing item
  306. };
  307. enum {
  308. FFchecked = (1<<7),
  309. FFmultiple = (1<<6)
  310. };
  311. // Option holds info about an option in a "select" form field
  312. struct Option
  313. {
  314. Option* next; // next in list of options for a field
  315. int selected; // true if selected initially
  316. Rune* value; // value attr
  317. Rune* display; // display string
  318. };
  319. // Form holds info about a form
  320. struct Form
  321. {
  322. Form* next; // in list of forms for document
  323. int formid; // serial no. of form within its doc
  324. Rune* name; // name or id attr (netscape uses name, HTML 4.0 uses id)
  325. Rune* action; // action attr
  326. int target; // target attr as targetid
  327. int method; // HGet or HPost
  328. int nfields; // number of fields
  329. Formfield* fields; // field's forms, in input order
  330. };
  331. // Flags used in various table structures
  332. enum {
  333. TFparsing = (1<<7),
  334. TFnowrap = (1<<6),
  335. TFisth = (1<<5)
  336. };
  337. // Information about a table
  338. struct Table
  339. {
  340. Table* next; // next in list of document's tables
  341. int tableid; // serial no. of table within its doc
  342. Tablerow* rows; // array of row specs (list during parsing)
  343. int nrow; // total number of rows
  344. Tablecol* cols; // array of column specs
  345. int ncol; // total number of columns
  346. Tablecell* cells; // list of unique cells
  347. int ncell; // total number of cells
  348. Tablecell*** grid; // 2-D array of cells
  349. Align align; // alignment spec for whole table
  350. Dimen width; // width spec for whole table
  351. int border; // border attr
  352. int cellspacing; // cellspacing attr
  353. int cellpadding; // cellpadding attr
  354. Background background; // table background
  355. Item* caption; // linked list of Items, giving caption
  356. uchar caption_place; // ALtop or ALbottom
  357. Lay* caption_lay; // layout of caption
  358. int totw; // total width
  359. int toth; // total height
  360. int caph; // caption height
  361. int availw; // used for previous 3 sizes
  362. Token* tabletok; // token that started the table
  363. uchar flags; // Lchanged, perhaps
  364. };
  365. struct Tablecol
  366. {
  367. int width;
  368. Align align;
  369. Point pos;
  370. };
  371. struct Tablerow
  372. {
  373. Tablerow* next; // Next in list of rows, during parsing
  374. Tablecell* cells; // Cells in row, linked through nextinrow
  375. int height;
  376. int ascent;
  377. Align align;
  378. Background background;
  379. Point pos;
  380. uchar flags; // 0 or TFparsing
  381. };
  382. // A Tablecell is one cell of a table.
  383. // It may span multiple rows and multiple columns.
  384. // Cells are linked on two lists: the list for all the cells of
  385. // a document (the next pointers), and the list of all the
  386. // cells that start in a given row (the nextinrow pointers)
  387. struct Tablecell
  388. {
  389. Tablecell* next; // next in list of table's cells
  390. Tablecell* nextinrow; // next in list of row's cells
  391. int cellid; // serial no. of cell within table
  392. Item* content; // contents before layout
  393. Lay* lay; // layout of cell
  394. int rowspan; // number of rows spanned by this cell
  395. int colspan; // number of cols spanned by this cell
  396. Align align; // alignment spec
  397. uchar flags; // TFparsing, TFnowrap, TFisth
  398. Dimen wspec; // suggested width
  399. int hspec; // suggested height
  400. Background background; // cell background
  401. int minw; // minimum possible width
  402. int maxw; // maximum width
  403. int ascent; // cell's ascent
  404. int row; // row of upper left corner
  405. int col; // col of upper left corner
  406. Point pos; // nw corner of cell contents, in cell
  407. };
  408. // Anchor is for info about hyperlinks that go somewhere
  409. struct Anchor
  410. {
  411. Anchor* next; // next in list of document's anchors
  412. int index; // serial no. of anchor within its doc
  413. Rune* name; // name attr
  414. Rune* href; // href attr
  415. int target; // target attr as targetid
  416. };
  417. // DestAnchor is for info about hyperlinks that are destinations
  418. struct DestAnchor
  419. {
  420. DestAnchor* next; // next in list of document's destanchors
  421. int index; // serial no. of anchor within its doc
  422. Rune* name; // name attr
  423. Item* item; // the destination
  424. };
  425. // Maps (client side)
  426. struct Map
  427. {
  428. Map* next; // next in list of document's maps
  429. Rune* name; // map name
  430. Area* areas; // list of map areas
  431. };
  432. struct Area
  433. {
  434. Area* next; // next in list of a map's areas
  435. int shape; // SHrect, etc.
  436. Rune* href; // associated hypertext link
  437. int target; // associated target frame
  438. Dimen* coords; // array of coords for shape
  439. int ncoords; // size of coords array
  440. };
  441. // Area shapes
  442. enum {
  443. SHrect, SHcircle, SHpoly
  444. };
  445. // Fonts are represented by integers: style*NumSize + size
  446. // Font styles
  447. enum {
  448. FntR, // roman
  449. FntI, // italic
  450. FntB, // bold
  451. FntT, // typewriter
  452. NumStyle
  453. };
  454. // Font sizes
  455. enum {
  456. Tiny,
  457. Small,
  458. Normal,
  459. Large,
  460. Verylarge,
  461. NumSize
  462. };
  463. enum {
  464. NumFnt = (NumStyle*NumSize),
  465. DefFnt = (FntR*NumSize+Normal)
  466. };
  467. // Lines are needed through some text items, for underlining or strikethrough
  468. enum {
  469. ULnone, ULunder, ULmid
  470. };
  471. // Kidinfo flags
  472. enum {
  473. FRnoresize = (1<<0),
  474. FRnoscroll = (1<<1),
  475. FRhscroll = (1<<2),
  476. FRvscroll = (1<<3),
  477. FRhscrollauto = (1<<4),
  478. FRvscrollauto = (1<<5)
  479. };
  480. // Information about child frame or frameset
  481. struct Kidinfo
  482. {
  483. Kidinfo* next; // in list of kidinfos for a frameset
  484. int isframeset;
  485. // fields for "frame"
  486. Rune* src; // only nil if a "dummy" frame or this is frameset
  487. Rune* name; // always non-empty if this isn't frameset
  488. int marginw;
  489. int marginh;
  490. int framebd;
  491. int flags;
  492. // fields for "frameset"
  493. Dimen* rows; // array of row dimensions
  494. int nrows; // length of rows
  495. Dimen* cols; // array of col dimensions
  496. int ncols; // length of cols
  497. Kidinfo* kidinfos;
  498. Kidinfo* nextframeset; // parsing stack
  499. };
  500. // Document info (global information about HTML page)
  501. struct Docinfo
  502. {
  503. // stuff from HTTP headers, doc head, and body tag
  504. Rune* src; // original source of doc
  505. Rune* base; // base URL of doc
  506. Rune* doctitle; // from <title> element
  507. Background background; // background specification
  508. Iimage* backgrounditem; // Image Item for doc background image, or nil
  509. int text; // doc foreground (text) color
  510. int link; // unvisited hyperlink color
  511. int vlink; // visited hyperlink color
  512. int alink; // highlighting hyperlink color
  513. int target; // target frame default
  514. int chset; // ISO_8859, etc.
  515. int mediatype; // TextHtml, etc.
  516. int scripttype; // TextJavascript, etc.
  517. int hasscripts; // true if scripts used
  518. Rune* refresh; // content of <http-equiv=Refresh ...>
  519. Kidinfo* kidinfo; // if a frameset
  520. int frameid; // id of document frame
  521. // info needed to respond to user actions
  522. Anchor* anchors; // list of href anchors
  523. DestAnchor* dests; // list of destination anchors
  524. Form* forms; // list of forms
  525. Table* tables; // list of tables
  526. Map* maps; // list of maps
  527. Iimage* images; // list of image items (through nextimage links)
  528. };
  529. extern int dimenkind(Dimen d);
  530. extern int dimenspec(Dimen d);
  531. extern void freedocinfo(Docinfo* d);
  532. extern void freeitems(Item* ithead);
  533. extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
  534. extern void printitems(Item* items, char* msg);
  535. extern int targetid(Rune* s);
  536. extern Rune* targetname(int targid);
  537. extern int validitems(Item* i);
  538. #pragma varargck type "I" Item*
  539. // Control print output
  540. extern int warn;
  541. extern int dbglex;
  542. extern int dbgbuild;
  543. // To be provided by caller
  544. // emalloc and erealloc should not return if can't get memory.
  545. // emalloc should zero its memory.
  546. extern void* emalloc(ulong);
  547. extern void* erealloc(void* p, ulong size);