1
0

html.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #pragma lib "libhtml.a"
  10. #pragma src "/sys/src/libhtml"
  11. /* UTILS */
  12. extern uint8_t* fromStr(Rune* buf, int n, int chset);
  13. extern Rune* toStr(uint8_t* buf, int n, int chset);
  14. /* Common LEX and BUILD enums */
  15. /* Media types */
  16. enum
  17. {
  18. ApplMsword,
  19. ApplOctets,
  20. ApplPdf,
  21. ApplPostscript,
  22. ApplRtf,
  23. ApplFramemaker,
  24. ApplMsexcel,
  25. ApplMspowerpoint,
  26. UnknownType,
  27. Audio32kadpcm,
  28. AudioBasic,
  29. ImageCgm,
  30. ImageG3fax,
  31. ImageGif,
  32. ImageIef,
  33. ImageJpeg,
  34. ImagePng,
  35. ImageTiff,
  36. ImageXBit,
  37. ImageXBit2,
  38. ImageXBitmulti,
  39. ImageXXBitmap,
  40. ModelVrml,
  41. MultiDigest,
  42. MultiMixed,
  43. TextCss,
  44. TextEnriched,
  45. TextHtml,
  46. TextJavascript,
  47. TextPlain,
  48. TextRichtext,
  49. TextSgml,
  50. TextTabSeparatedValues,
  51. TextXml,
  52. VideoMpeg,
  53. VideoQuicktime,
  54. NMEDIATYPES
  55. };
  56. /* HTTP methods */
  57. enum
  58. {
  59. HGet,
  60. HPost
  61. };
  62. /* Charsets */
  63. enum
  64. {
  65. UnknownCharset,
  66. US_Ascii,
  67. ISO_8859_1,
  68. UTF_8,
  69. Unicode,
  70. NCHARSETS
  71. };
  72. /* Frame Target IDs */
  73. enum {
  74. FTtop,
  75. FTself,
  76. FTparent,
  77. FTblank
  78. };
  79. /* LEX */
  80. typedef struct Token Token;
  81. typedef struct Attr Attr;
  82. #pragma incomplete Token
  83. /* BUILD */
  84. typedef struct Item Item;
  85. typedef struct Itext Itext;
  86. typedef struct Irule Irule;
  87. typedef struct Iimage Iimage;
  88. typedef struct Iformfield Iformfield;
  89. typedef struct Itable Itable;
  90. typedef struct Ifloat Ifloat;
  91. typedef struct Ispacer Ispacer;
  92. typedef struct Genattr Genattr;
  93. typedef struct SEvent SEvent;
  94. typedef struct Formfield Formfield;
  95. typedef struct Option Option;
  96. typedef struct Form Form;
  97. typedef struct Table Table;
  98. typedef struct Tablecol Tablecol;
  99. typedef struct Tablerow Tablerow;
  100. typedef struct Tablecell Tablecell;
  101. typedef struct Align Align;
  102. typedef struct Dimen Dimen;
  103. typedef struct Anchor Anchor;
  104. typedef struct DestAnchor DestAnchor;
  105. typedef struct Map Map;
  106. typedef struct Area Area;
  107. typedef struct Background Background;
  108. typedef struct Kidinfo Kidinfo;
  109. typedef struct Docinfo Docinfo;
  110. typedef struct Stack Stack;
  111. typedef struct Pstate Pstate;
  112. typedef struct ItemSource ItemSource;
  113. typedef struct Lay Lay; /* defined in Layout module */
  114. #pragma incomplete Lay
  115. /* Alignment types */
  116. enum {
  117. ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
  118. ALchar, ALtop, ALmiddle, ALbottom, ALbaseline,
  119. };
  120. struct Align
  121. {
  122. uint8_t halign; /* one of ALnone, ALleft, etc. */
  123. uint8_t valign; /* one of ALnone, ALtop, etc. */
  124. };
  125. /*
  126. * A Dimen holds a dimension specification, especially for those
  127. * cases when a number can be followed by a % or a * to indicate
  128. * percentage of total or relative weight.
  129. * Dnone means no dimension was specified
  130. */
  131. /* To fit in a word, use top bits to identify kind, rest for value */
  132. enum {
  133. Dnone = 0,
  134. Dpixels = (1<<29),
  135. Dpercent = (2<<29),
  136. Drelative = (3<<29),
  137. Dkindmask = (3<<29),
  138. Dspecmask = (~Dkindmask)
  139. };
  140. struct Dimen
  141. {
  142. int kindspec; /* kind | spec */
  143. };
  144. /*
  145. * Background is either an image or a color.
  146. * If both are set, the image has precedence.
  147. */
  148. struct Background
  149. {
  150. Rune* image; /* url */
  151. int color;
  152. };
  153. /*
  154. * There are about a half dozen Item variants.
  155. * The all look like this at the start (using Plan 9 C's
  156. * anonymous structure member mechanism),
  157. * and then the tag field dictates what extra fields there are.
  158. */
  159. struct Item
  160. {
  161. Item* next; /* successor in list of items */
  162. int width; /* width in pixels (0 for floating items) */
  163. int height; /* height in pixels */
  164. int ascent; /* ascent (from top to baseline) in pixels */
  165. int anchorid; /* if nonzero, which anchor we're in */
  166. int state; /* flags and values (see below) */
  167. Genattr*genattr; /* generic attributes and events */
  168. int tag; /* variant discriminator: Itexttag, etc. */
  169. };
  170. /* Item variant tags */
  171. enum {
  172. Itexttag,
  173. Iruletag,
  174. Iimagetag,
  175. Iformfieldtag,
  176. Itabletag,
  177. Ifloattag,
  178. Ispacertag
  179. };
  180. struct Itext
  181. {
  182. Item; /* (with tag ==Itexttag) */
  183. Rune* s; /* the characters */
  184. int fnt; /* style*NumSize+size (see font stuff, below) */
  185. int fg; /* Pixel (color) for text */
  186. uint8_t voff; /* Voffbias+vertical offset from baseline, in pixels (+ve == down) */
  187. uint8_t ul; /* ULnone, ULunder, or ULmid */
  188. };
  189. struct Irule
  190. {
  191. Item; /* (with tag ==Iruletag) */
  192. uint8_t align; /* alignment spec */
  193. uint8_t noshade; /* if true, don't shade */
  194. int size; /* size attr (rule height) */
  195. int color; /* color attr */
  196. Dimen wspec; /* width spec */
  197. };
  198. struct Iimage
  199. {
  200. Item; /* (with tag ==Iimagetag) */
  201. Rune* imsrc; /* image src url */
  202. int imwidth; /* spec width (actual, if no spec) */
  203. int imheight; /* spec height (actual, if no spec) */
  204. Rune* altrep; /* alternate representation, in absence of image */
  205. Map* map; /* if non-nil, client side map */
  206. int ctlid; /* if animated */
  207. uint8_t align; /* vertical alignment */
  208. uint8_t hspace; /* in pixels; buffer space on each side */
  209. uint8_t vspace; /* in pixels; buffer space on top and bottom */
  210. uint8_t border; /* in pixels: border width to draw around image */
  211. Iimage* nextimage; /* next in list of document's images */
  212. void* aux;
  213. };
  214. struct Iformfield
  215. {
  216. Item; /* (with tag ==Iformfieldtag) */
  217. Formfield*formfield;
  218. void* aux;
  219. };
  220. struct Itable
  221. {
  222. Item; /* (with tag ==Itabletag) */
  223. Table* table;
  224. };
  225. struct Ifloat
  226. {
  227. Item; /* (with tag ==Ifloattag) */
  228. Item* item; /* table or image item that floats */
  229. int x; /* x coord of top (from right, if ALright) */
  230. int y; /* y coord of top */
  231. uint8_t side; /* margin it floats to: ALleft or ALright */
  232. uint8_t infloats; /* true if this has been added to a lay.floats */
  233. Ifloat* nextfloat; /* in list of floats */
  234. };
  235. struct Ispacer
  236. {
  237. Item; /* (with tag ==Ispacertag) */
  238. int spkind; /* ISPnull, etc. */
  239. };
  240. /* Item state flags and value fields */
  241. enum {
  242. IFbrk = 0x80000000, /* forced break before this item */
  243. IFbrksp = 0x40000000, /* add 1 line space to break (IFbrk set too) */
  244. IFnobrk = 0x20000000, /* break not allowed before this item */
  245. IFcleft = 0x10000000, /* clear left floats (IFbrk set too) */
  246. IFcright= 0x08000000, /* clear right floats (IFbrk set too) */
  247. IFwrap = 0x04000000, /* in a wrapping (non-pre) line */
  248. IFhang = 0x02000000, /* in a hanging (into left indent) item */
  249. IFrjust = 0x01000000, /* right justify current line */
  250. IFcjust = 0x00800000, /* center justify current line */
  251. IFsmap = 0x00400000, /* image is server-side map */
  252. IFindentshift = 8,
  253. IFindentmask = (255<<IFindentshift), /* current indent, in tab stops */
  254. IFhangmask = 255 /* current hang into left indent, in 1/10th tabstops */
  255. };
  256. /* Bias added to Itext's voff field */
  257. enum { Voffbias = 128 };
  258. /* Spacer kinds */
  259. enum {
  260. ISPnull, /* 0 height and width */
  261. ISPvline, /* height and ascent of current font */
  262. ISPhspace, /* width of space in current font */
  263. ISPgeneral /* other purposes (e.g., between markers and list) */
  264. };
  265. /* Generic attributes and events (not many elements will have any of these set) */
  266. struct Genattr
  267. {
  268. Rune* id;
  269. Rune* class;
  270. Rune* style;
  271. Rune* title;
  272. SEvent* events;
  273. };
  274. struct SEvent
  275. {
  276. SEvent* next; /* in list of events */
  277. int type; /* SEonblur, etc. */
  278. Rune* script;
  279. };
  280. enum {
  281. SEonblur, SEonchange, SEonclick, SEondblclick,
  282. SEonfocus, SEonkeypress, SEonkeyup, SEonload,
  283. SEonmousedown, SEonmousemove, SEonmouseout,
  284. SEonmouseover, SEonmouseup, SEonreset, SEonselect,
  285. SEonsubmit, SEonunload,
  286. Numscriptev
  287. };
  288. /* Form field types */
  289. enum {
  290. Ftext,
  291. Fpassword,
  292. Fcheckbox,
  293. Fradio,
  294. Fsubmit,
  295. Fhidden,
  296. Fimage,
  297. Freset,
  298. Ffile,
  299. Fbutton,
  300. Fselect,
  301. Ftextarea
  302. };
  303. /* Information about a field in a form */
  304. struct Formfield
  305. {
  306. Formfield*next; /* in list of fields for a form */
  307. int ftype; /* Ftext, Fpassword, etc. */
  308. int fieldid; /* serial no. of field within its form */
  309. Form* form; /* containing form */
  310. Rune* name; /* name attr */
  311. Rune* value; /* value attr */
  312. int size; /* size attr */
  313. int maxlength; /* maxlength attr */
  314. int rows; /* rows attr */
  315. int cols; /* cols attr */
  316. uint8_t flags; /* FFchecked, etc. */
  317. Option* options; /* for Fselect fields */
  318. Item* image; /* image item, for Fimage fields */
  319. int ctlid; /* identifies control for this field in layout */
  320. SEvent* events; /* same as genattr->events of containing item */
  321. };
  322. enum {
  323. FFchecked = (1<<7),
  324. FFmultiple = (1<<6)
  325. };
  326. /* Option holds info about an option in a "select" form field */
  327. struct Option
  328. {
  329. Option* next; /* next in list of options for a field */
  330. int selected; /* true if selected initially */
  331. Rune* value; /* value attr */
  332. Rune* display; /* display string */
  333. };
  334. /* Form holds info about a form */
  335. struct Form
  336. {
  337. Form* next; /* in list of forms for document */
  338. int formid; /* serial no. of form within its doc */
  339. Rune* name; /* name or id attr (netscape uses name, HTML 4.0 uses id) */
  340. Rune* action; /* action attr */
  341. int target; /* target attr as targetid */
  342. int method; /* HGet or HPost */
  343. int nfields; /* number of fields */
  344. Formfield*fields; /* field's forms, in input order */
  345. };
  346. /* Flags used in various table structures */
  347. enum {
  348. TFparsing = (1<<7),
  349. TFnowrap = (1<<6),
  350. TFisth = (1<<5)
  351. };
  352. /* Information about a table */
  353. struct Table
  354. {
  355. Table* next; /* next in list of document's tables */
  356. int tableid; /* serial no. of table within its doc */
  357. Tablerow*rows; /* array of row specs (list during parsing) */
  358. int nrow; /* total number of rows */
  359. Tablecol*cols; /* array of column specs */
  360. int ncol; /* total number of columns */
  361. Tablecell*cells; /* list of unique cells */
  362. int ncell; /* total number of cells */
  363. Tablecell***grid; /* 2-D array of cells */
  364. Align align; /* alignment spec for whole table */
  365. Dimen width; /* width spec for whole table */
  366. int border; /* border attr */
  367. int cellspacing; /* cellspacing attr */
  368. int cellpadding; /* cellpadding attr */
  369. Background background; /* table background */
  370. Item* caption; /* linked list of Items, giving caption */
  371. uint8_t caption_place; /* ALtop or ALbottom */
  372. Lay* caption_lay; /* layout of caption */
  373. int totw; /* total width */
  374. int toth; /* total height */
  375. int caph; /* caption height */
  376. int availw; /* used for previous 3 sizes */
  377. Token* tabletok; /* token that started the table */
  378. uint8_t flags; /* Lchanged, perhaps */
  379. };
  380. struct Tablecol
  381. {
  382. int width;
  383. Align align;
  384. Point pos;
  385. };
  386. struct Tablerow
  387. {
  388. Tablerow*next; /* Next in list of rows, during parsing */
  389. Tablecell*cells; /* Cells in row, linked through nextinrow */
  390. int height;
  391. int ascent;
  392. Align align;
  393. Background background;
  394. Point pos;
  395. uint8_t flags; /* 0 or TFparsing */
  396. };
  397. /*
  398. * A Tablecell is one cell of a table.
  399. * It may span multiple rows and multiple columns.
  400. * Cells are linked on two lists: the list for all the cells of
  401. * a document (the next pointers), and the list of all the
  402. * cells that start in a given row (the nextinrow pointers)
  403. */
  404. struct Tablecell
  405. {
  406. Tablecell*next; /* next in list of table's cells */
  407. Tablecell*nextinrow; /* next in list of row's cells */
  408. int cellid; /* serial no. of cell within table */
  409. Item* content; /* contents before layout */
  410. Lay* lay; /* layout of cell */
  411. int rowspan; /* number of rows spanned by this cell */
  412. int colspan; /* number of cols spanned by this cell */
  413. Align align; /* alignment spec */
  414. uint8_t flags; /* TFparsing, TFnowrap, TFisth */
  415. Dimen wspec; /* suggested width */
  416. int hspec; /* suggested height */
  417. Background background; /* cell background */
  418. int minw; /* minimum possible width */
  419. int maxw; /* maximum width */
  420. int ascent; /* cell's ascent */
  421. int row; /* row of upper left corner */
  422. int col; /* col of upper left corner */
  423. Point pos; /* nw corner of cell contents, in cell */
  424. };
  425. /* Anchor is for info about hyperlinks that go somewhere */
  426. struct Anchor
  427. {
  428. Anchor* next; /* next in list of document's anchors */
  429. int index; /* serial no. of anchor within its doc */
  430. Rune* name; /* name attr */
  431. Rune* href; /* href attr */
  432. int target; /* target attr as targetid */
  433. };
  434. /* DestAnchor is for info about hyperlinks that are destinations */
  435. struct DestAnchor
  436. {
  437. DestAnchor*next; /* next in list of document's destanchors */
  438. int index; /* serial no. of anchor within its doc */
  439. Rune* name; /* name attr */
  440. Item* item; /* the destination */
  441. };
  442. /* Maps (client side) */
  443. struct Map
  444. {
  445. Map* next; /* next in list of document's maps */
  446. Rune* name; /* map name */
  447. Area* areas; /* list of map areas */
  448. };
  449. struct Area
  450. {
  451. Area* next; /* next in list of a map's areas */
  452. int shape; /* SHrect, etc. */
  453. Rune* href; /* associated hypertext link */
  454. int target; /* associated target frame */
  455. Dimen* coords; /* array of coords for shape */
  456. int ncoords; /* size of coords array */
  457. };
  458. /* Area shapes */
  459. enum {
  460. SHrect, SHcircle, SHpoly
  461. };
  462. /* Fonts are represented by integers: style*NumSize + size */
  463. /* Font styles */
  464. enum {
  465. FntR, /* roman */
  466. FntI, /* italic */
  467. FntB, /* bold */
  468. FntT, /* typewriter */
  469. NumStyle
  470. };
  471. /* Font sizes */
  472. enum {
  473. Tiny,
  474. Small,
  475. Normal,
  476. Large,
  477. Verylarge,
  478. NumSize
  479. };
  480. enum {
  481. NumFnt = NumStyle*NumSize,
  482. DefFnt = FntR*NumSize+Normal,
  483. };
  484. /* Lines are needed through some text items, for underlining or strikethrough */
  485. enum {
  486. ULnone, ULunder, ULmid
  487. };
  488. /* Kidinfo flags */
  489. enum {
  490. FRnoresize = (1<<0),
  491. FRnoscroll = (1<<1),
  492. FRhscroll = (1<<2),
  493. FRvscroll = (1<<3),
  494. FRhscrollauto = (1<<4),
  495. FRvscrollauto = (1<<5)
  496. };
  497. /* Information about child frame or frameset */
  498. struct Kidinfo
  499. {
  500. Kidinfo*next; /* in list of kidinfos for a frameset */
  501. int isframeset;
  502. /* fields for "frame" */
  503. Rune* src; /* only nil if a "dummy" frame or this is frameset */
  504. Rune* name; /* always non-empty if this isn't frameset */
  505. int marginw;
  506. int marginh;
  507. int framebd;
  508. int flags;
  509. /* fields for "frameset" */
  510. Dimen* rows; /* array of row dimensions */
  511. int nrows; /* length of rows */
  512. Dimen* cols; /* array of col dimensions */
  513. int ncols; /* length of cols */
  514. Kidinfo*kidinfos;
  515. Kidinfo*nextframeset; /* parsing stack */
  516. };
  517. /* Document info (global information about HTML page) */
  518. struct Docinfo
  519. {
  520. /* stuff from HTTP headers, doc head, and body tag */
  521. Rune* src; /* original source of doc */
  522. Rune* base; /* base URL of doc */
  523. Rune* doctitle; /* from <title> element */
  524. Background background; /* background specification */
  525. Iimage* backgrounditem; /* Image Item for doc background image, or nil */
  526. int text; /* doc foreground (text) color */
  527. int link; /* unvisited hyperlink color */
  528. int vlink; /* visited hyperlink color */
  529. int alink; /* highlighting hyperlink color */
  530. int target; /* target frame default */
  531. int chset; /* ISO_8859, etc. */
  532. int mediatype; /* TextHtml, etc. */
  533. int scripttype; /* TextJavascript, etc. */
  534. int hasscripts; /* true if scripts used */
  535. Rune* refresh; /* content of <http-equiv=Refresh ...> */
  536. Kidinfo*kidinfo; /* if a frameset */
  537. int frameid; /* id of document frame */
  538. /* info needed to respond to user actions */
  539. Anchor* anchors; /* list of href anchors */
  540. DestAnchor*dests; /* list of destination anchors */
  541. Form* forms; /* list of forms */
  542. Table* tables; /* list of tables */
  543. Map* maps; /* list of maps */
  544. Iimage* images; /* list of image items (through nextimage links) */
  545. };
  546. extern int dimenkind(Dimen d);
  547. extern int dimenspec(Dimen d);
  548. extern void freedocinfo(Docinfo* d);
  549. extern void freeitems(Item* ithead);
  550. extern Item* parsehtml(uint8_t* data, int datalen, Rune* src,
  551. int mtype, int chset, Docinfo** pdi);
  552. extern void printitems(Item* items, char* msg);
  553. extern int targetid(Rune* s);
  554. extern Rune* targetname(int targid);
  555. extern int validitems(Item* i);
  556. #pragma varargck type "I" Item*
  557. /* Control print output */
  558. extern int warn;
  559. extern int dbglex;
  560. extern int dbgbuild;
  561. /*
  562. * To be provided by caller
  563. * emalloc and erealloc should not return if can't get memory.
  564. * emalloc should zero its memory.
  565. */
  566. extern void* emalloc(unsigned long);
  567. extern void* erealloc(void* p, uint32_t size);