lex.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. #include <draw.h>
  12. #include <ctype.h>
  13. #include <html.h>
  14. #include "impl.h"
  15. typedef struct TokenSource TokenSource;
  16. struct TokenSource
  17. {
  18. int i; // index of next byte to use
  19. uint8_t* data; // all the data
  20. int edata; // data[0:edata] is valid
  21. int chset; // one of US_Ascii, etc.
  22. int mtype; // TextHtml or TextPlain
  23. };
  24. enum {
  25. EOF = -2,
  26. EOB = -1
  27. };
  28. #define ISNAMCHAR(c) ((c)<256 && (isalpha(c) || isdigit(c) || (c) == '-' || (c) == '.'))
  29. #define SMALLBUFSIZE 240
  30. #define BIGBUFSIZE 2000
  31. // HTML 4.0 tag names.
  32. // Keep sorted, and in correspondence with enum in iparse.h.
  33. Rune* tagnames[] = {
  34. L" ",
  35. L"!",
  36. L"a",
  37. L"abbr",
  38. L"acronym",
  39. L"address",
  40. L"applet",
  41. L"area",
  42. L"b",
  43. L"base",
  44. L"basefont",
  45. L"bdo",
  46. L"big",
  47. L"blink",
  48. L"blockquote",
  49. L"body",
  50. L"bq",
  51. L"br",
  52. L"button",
  53. L"caption",
  54. L"center",
  55. L"cite",
  56. L"code",
  57. L"col",
  58. L"colgroup",
  59. L"dd",
  60. L"del",
  61. L"dfn",
  62. L"dir",
  63. L"div",
  64. L"dl",
  65. L"dt",
  66. L"em",
  67. L"fieldset",
  68. L"font",
  69. L"form",
  70. L"frame",
  71. L"frameset",
  72. L"h1",
  73. L"h2",
  74. L"h3",
  75. L"h4",
  76. L"h5",
  77. L"h6",
  78. L"head",
  79. L"hr",
  80. L"html",
  81. L"i",
  82. L"iframe",
  83. L"img",
  84. L"input",
  85. L"ins",
  86. L"isindex",
  87. L"kbd",
  88. L"label",
  89. L"legend",
  90. L"li",
  91. L"link",
  92. L"map",
  93. L"menu",
  94. L"meta",
  95. L"nobr",
  96. L"noframes",
  97. L"noscript",
  98. L"object",
  99. L"ol",
  100. L"optgroup",
  101. L"option",
  102. L"p",
  103. L"param",
  104. L"pre",
  105. L"q",
  106. L"s",
  107. L"samp",
  108. L"script",
  109. L"select",
  110. L"small",
  111. L"span",
  112. L"strike",
  113. L"strong",
  114. L"style",
  115. L"sub",
  116. L"sup",
  117. L"table",
  118. L"tbody",
  119. L"td",
  120. L"textarea",
  121. L"tfoot",
  122. L"th",
  123. L"thead",
  124. L"title",
  125. L"tr",
  126. L"tt",
  127. L"u",
  128. L"ul",
  129. L"var"
  130. };
  131. // HTML 4.0 attribute names.
  132. // Keep sorted, and in correspondence with enum in impl.h.
  133. Rune* attrnames[] = {
  134. L"abbr",
  135. L"accept-charset",
  136. L"access-key",
  137. L"action",
  138. L"align",
  139. L"alink",
  140. L"alt",
  141. L"archive",
  142. L"axis",
  143. L"background",
  144. L"bgcolor",
  145. L"border",
  146. L"cellpadding",
  147. L"cellspacing",
  148. L"char",
  149. L"charoff",
  150. L"charset",
  151. L"checked",
  152. L"cite",
  153. L"class",
  154. L"classid",
  155. L"clear",
  156. L"code",
  157. L"codebase",
  158. L"codetype",
  159. L"color",
  160. L"cols",
  161. L"colspan",
  162. L"compact",
  163. L"content",
  164. L"coords",
  165. L"data",
  166. L"datetime",
  167. L"declare",
  168. L"defer",
  169. L"dir",
  170. L"disabled",
  171. L"enctype",
  172. L"face",
  173. L"for",
  174. L"frame",
  175. L"frameborder",
  176. L"headers",
  177. L"height",
  178. L"href",
  179. L"hreflang",
  180. L"hspace",
  181. L"http-equiv",
  182. L"id",
  183. L"ismap",
  184. L"label",
  185. L"lang",
  186. L"link",
  187. L"longdesc",
  188. L"marginheight",
  189. L"marginwidth",
  190. L"maxlength",
  191. L"media",
  192. L"method",
  193. L"multiple",
  194. L"name",
  195. L"nohref",
  196. L"noresize",
  197. L"noshade",
  198. L"nowrap",
  199. L"object",
  200. L"onblur",
  201. L"onchange",
  202. L"onclick",
  203. L"ondblclick",
  204. L"onfocus",
  205. L"onkeypress",
  206. L"onkeyup",
  207. L"onload",
  208. L"onmousedown",
  209. L"onmousemove",
  210. L"onmouseout",
  211. L"onmouseover",
  212. L"onmouseup",
  213. L"onreset",
  214. L"onselect",
  215. L"onsubmit",
  216. L"onunload",
  217. L"profile",
  218. L"prompt",
  219. L"readonly",
  220. L"rel",
  221. L"rev",
  222. L"rows",
  223. L"rowspan",
  224. L"rules",
  225. L"scheme",
  226. L"scope",
  227. L"scrolling",
  228. L"selected",
  229. L"shape",
  230. L"size",
  231. L"span",
  232. L"src",
  233. L"standby",
  234. L"start",
  235. L"style",
  236. L"summary",
  237. L"tabindex",
  238. L"target",
  239. L"text",
  240. L"title",
  241. L"type",
  242. L"usemap",
  243. L"valign",
  244. L"value",
  245. L"valuetype",
  246. L"version",
  247. L"vlink",
  248. L"vspace",
  249. L"width"
  250. };
  251. // Character entity to unicode character number map.
  252. // Keep sorted by name.
  253. StringInt chartab[]= {
  254. {L"AElig", 198},
  255. {L"Aacute", 193},
  256. {L"Acirc", 194},
  257. {L"Agrave", 192},
  258. {L"Alpha", 913},
  259. {L"Aring", 197},
  260. {L"Atilde", 195},
  261. {L"Auml", 196},
  262. {L"Beta", 914},
  263. {L"Ccedil", 199},
  264. {L"Chi", 935},
  265. {L"Dagger", 8225},
  266. {L"Delta", 916},
  267. {L"ETH", 208},
  268. {L"Eacute", 201},
  269. {L"Ecirc", 202},
  270. {L"Egrave", 200},
  271. {L"Epsilon", 917},
  272. {L"Eta", 919},
  273. {L"Euml", 203},
  274. {L"Gamma", 915},
  275. {L"Iacute", 205},
  276. {L"Icirc", 206},
  277. {L"Igrave", 204},
  278. {L"Iota", 921},
  279. {L"Iuml", 207},
  280. {L"Kappa", 922},
  281. {L"Lambda", 923},
  282. {L"Mu", 924},
  283. {L"Ntilde", 209},
  284. {L"Nu", 925},
  285. {L"OElig", 338},
  286. {L"Oacute", 211},
  287. {L"Ocirc", 212},
  288. {L"Ograve", 210},
  289. {L"Omega", 937},
  290. {L"Omicron", 927},
  291. {L"Oslash", 216},
  292. {L"Otilde", 213},
  293. {L"Ouml", 214},
  294. {L"Phi", 934},
  295. {L"Pi", 928},
  296. {L"Prime", 8243},
  297. {L"Psi", 936},
  298. {L"Rho", 929},
  299. {L"Scaron", 352},
  300. {L"Sigma", 931},
  301. {L"THORN", 222},
  302. {L"Tau", 932},
  303. {L"Theta", 920},
  304. {L"Uacute", 218},
  305. {L"Ucirc", 219},
  306. {L"Ugrave", 217},
  307. {L"Upsilon", 933},
  308. {L"Uuml", 220},
  309. {L"Xi", 926},
  310. {L"Yacute", 221},
  311. {L"Yuml", 376},
  312. {L"Zeta", 918},
  313. {L"aacute", 225},
  314. {L"acirc", 226},
  315. {L"acute", 180},
  316. {L"aelig", 230},
  317. {L"agrave", 224},
  318. {L"alefsym", 8501},
  319. {L"alpha", 945},
  320. {L"amp", 38},
  321. {L"and", 8743},
  322. {L"ang", 8736},
  323. {L"aring", 229},
  324. {L"asymp", 8776},
  325. {L"atilde", 227},
  326. {L"auml", 228},
  327. {L"bdquo", 8222},
  328. {L"beta", 946},
  329. {L"brvbar", 166},
  330. {L"bull", 8226},
  331. {L"cap", 8745},
  332. {L"ccedil", 231},
  333. {L"cdots", 8943},
  334. {L"cedil", 184},
  335. {L"cent", 162},
  336. {L"chi", 967},
  337. {L"circ", 710},
  338. {L"clubs", 9827},
  339. {L"cong", 8773},
  340. {L"copy", 169},
  341. {L"crarr", 8629},
  342. {L"cup", 8746},
  343. {L"curren", 164},
  344. {L"dArr", 8659},
  345. {L"dagger", 8224},
  346. {L"darr", 8595},
  347. {L"ddots", 8945},
  348. {L"deg", 176},
  349. {L"delta", 948},
  350. {L"diams", 9830},
  351. {L"divide", 247},
  352. {L"eacute", 233},
  353. {L"ecirc", 234},
  354. {L"egrave", 232},
  355. {L"emdash", 8212}, /* non-standard but commonly used */
  356. {L"empty", 8709},
  357. {L"emsp", 8195},
  358. {L"endash", 8211}, /* non-standard but commonly used */
  359. {L"ensp", 8194},
  360. {L"epsilon", 949},
  361. {L"equiv", 8801},
  362. {L"eta", 951},
  363. {L"eth", 240},
  364. {L"euml", 235},
  365. {L"euro", 8364},
  366. {L"exist", 8707},
  367. {L"fnof", 402},
  368. {L"forall", 8704},
  369. {L"frac12", 189},
  370. {L"frac14", 188},
  371. {L"frac34", 190},
  372. {L"frasl", 8260},
  373. {L"gamma", 947},
  374. {L"ge", 8805},
  375. {L"gt", 62},
  376. {L"hArr", 8660},
  377. {L"harr", 8596},
  378. {L"hearts", 9829},
  379. {L"hellip", 8230},
  380. {L"iacute", 237},
  381. {L"icirc", 238},
  382. {L"iexcl", 161},
  383. {L"igrave", 236},
  384. {L"image", 8465},
  385. {L"infin", 8734},
  386. {L"int", 8747},
  387. {L"iota", 953},
  388. {L"iquest", 191},
  389. {L"isin", 8712},
  390. {L"iuml", 239},
  391. {L"kappa", 954},
  392. {L"lArr", 8656},
  393. {L"lambda", 955},
  394. {L"lang", 9001},
  395. {L"laquo", 171},
  396. {L"larr", 8592},
  397. {L"lceil", 8968},
  398. {L"ldots", 8230},
  399. {L"ldquo", 8220},
  400. {L"le", 8804},
  401. {L"lfloor", 8970},
  402. {L"lowast", 8727},
  403. {L"loz", 9674},
  404. {L"lrm", 8206},
  405. {L"lsaquo", 8249},
  406. {L"lsquo", 8216},
  407. {L"lt", 60},
  408. {L"macr", 175},
  409. {L"mdash", 8212},
  410. {L"micro", 181},
  411. {L"middot", 183},
  412. {L"minus", 8722},
  413. {L"mu", 956},
  414. {L"nabla", 8711},
  415. {L"nbsp", 160},
  416. {L"ndash", 8211},
  417. {L"ne", 8800},
  418. {L"ni", 8715},
  419. {L"not", 172},
  420. {L"notin", 8713},
  421. {L"nsub", 8836},
  422. {L"ntilde", 241},
  423. {L"nu", 957},
  424. {L"oacute", 243},
  425. {L"ocirc", 244},
  426. {L"oelig", 339},
  427. {L"ograve", 242},
  428. {L"oline", 8254},
  429. {L"omega", 969},
  430. {L"omicron", 959},
  431. {L"oplus", 8853},
  432. {L"or", 8744},
  433. {L"ordf", 170},
  434. {L"ordm", 186},
  435. {L"oslash", 248},
  436. {L"otilde", 245},
  437. {L"otimes", 8855},
  438. {L"ouml", 246},
  439. {L"para", 182},
  440. {L"part", 8706},
  441. {L"permil", 8240},
  442. {L"perp", 8869},
  443. {L"phi", 966},
  444. {L"pi", 960},
  445. {L"piv", 982},
  446. {L"plusmn", 177},
  447. {L"pound", 163},
  448. {L"prime", 8242},
  449. {L"prod", 8719},
  450. {L"prop", 8733},
  451. {L"psi", 968},
  452. {L"quad", 8193},
  453. {L"quot", 34},
  454. {L"rArr", 8658},
  455. {L"radic", 8730},
  456. {L"rang", 9002},
  457. {L"raquo", 187},
  458. {L"rarr", 8594},
  459. {L"rceil", 8969},
  460. {L"rdquo", 8221},
  461. {L"real", 8476},
  462. {L"reg", 174},
  463. {L"rfloor", 8971},
  464. {L"rho", 961},
  465. {L"rlm", 8207},
  466. {L"rsaquo", 8250},
  467. {L"rsquo", 8217},
  468. {L"sbquo", 8218},
  469. {L"scaron", 353},
  470. {L"sdot", 8901},
  471. {L"sect", 167},
  472. {L"shy", 173},
  473. {L"sigma", 963},
  474. {L"sigmaf", 962},
  475. {L"sim", 8764},
  476. {L"sp", 8194},
  477. {L"spades", 9824},
  478. {L"sub", 8834},
  479. {L"sube", 8838},
  480. {L"sum", 8721},
  481. {L"sup", 8835},
  482. {L"sup1", 185},
  483. {L"sup2", 178},
  484. {L"sup3", 179},
  485. {L"supe", 8839},
  486. {L"szlig", 223},
  487. {L"tau", 964},
  488. {L"there4", 8756},
  489. {L"theta", 952},
  490. {L"thetasym", 977},
  491. {L"thinsp", 8201},
  492. {L"thorn", 254},
  493. {L"tilde", 732},
  494. {L"times", 215},
  495. {L"trade", 8482},
  496. {L"uArr", 8657},
  497. {L"uacute", 250},
  498. {L"uarr", 8593},
  499. {L"ucirc", 251},
  500. {L"ugrave", 249},
  501. {L"uml", 168},
  502. {L"upsih", 978},
  503. {L"upsilon", 965},
  504. {L"uuml", 252},
  505. {L"varepsilon", 8712},
  506. {L"varphi", 981},
  507. {L"varpi", 982},
  508. {L"varrho", 1009},
  509. {L"vdots", 8942},
  510. {L"vsigma", 962},
  511. {L"vtheta", 977},
  512. {L"weierp", 8472},
  513. {L"xi", 958},
  514. {L"yacute", 253},
  515. {L"yen", 165},
  516. {L"yuml", 255},
  517. {L"zeta", 950},
  518. {L"zwj", 8205},
  519. {L"zwnj", 8204}
  520. };
  521. #define NCHARTAB (sizeof(chartab)/sizeof(chartab[0]))
  522. // Characters Winstart..Winend are those that Windows
  523. // uses interpolated into the Latin1 set.
  524. // They aren't supposed to appear in HTML, but they do....
  525. enum {
  526. Winstart = 127,
  527. Winend = 159
  528. };
  529. static int winchars[]= { 8226, // 8226 is a bullet
  530. 8226, 8226, 8218, 402, 8222, 8230, 8224, 8225,
  531. 710, 8240, 352, 8249, 338, 8226, 8226, 8226,
  532. 8226, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
  533. 732, 8482, 353, 8250, 339, 8226, 8226, 376};
  534. static StringInt* tagtable; // initialized from tagnames
  535. static StringInt* attrtable; // initialized from attrnames
  536. static void lexinit(void);
  537. static int getplaindata(TokenSource* ts, Token* a, int* pai);
  538. static int getdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai);
  539. static int getscriptdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai, int findtag);
  540. static int gettag(TokenSource* ts, int starti, Token* a, int* pai);
  541. static Rune* buftostr(Rune* s, Rune* buf, int j);
  542. static int comment(TokenSource* ts);
  543. static int findstr(TokenSource* ts, Rune* s);
  544. static int ampersand(TokenSource* ts);
  545. static int lowerc(int c);
  546. static int getchar(TokenSource* ts);
  547. static void ungetchar(TokenSource* ts, int c);
  548. static void backup(TokenSource* ts, int savei);
  549. static void freeinsidetoken(Token* t);
  550. static void freeattrs(Attr* ahead);
  551. static Attr* newattr(int attid, Rune* value, Attr* link);
  552. static int Tconv(Fmt* f);
  553. int dbglex = 0;
  554. static int lexinited = 0;
  555. static void
  556. lexinit(void)
  557. {
  558. tagtable = _makestrinttab(tagnames, Numtags);
  559. attrtable = _makestrinttab(attrnames, Numattrs);
  560. fmtinstall('T', Tconv);
  561. lexinited = 1;
  562. }
  563. static TokenSource*
  564. newtokensource(uint8_t* data, int edata, int chset, int mtype)
  565. {
  566. TokenSource* ans;
  567. assert(chset == US_Ascii || chset == ISO_8859_1 ||
  568. chset == UTF_8 || chset == Unicode);
  569. ans = (TokenSource*)emalloc(sizeof(TokenSource));
  570. ans->i = 0;
  571. ans->data = data;
  572. ans->edata = edata;
  573. ans->chset = chset;
  574. ans->mtype = mtype;
  575. return ans;
  576. }
  577. enum {
  578. ToksChunk = 500,
  579. };
  580. // Call this to get the tokens.
  581. // The number of returned tokens is returned in *plen.
  582. Token*
  583. _gettoks(uint8_t* data, int datalen, int chset, int mtype, int* plen)
  584. {
  585. TokenSource* ts;
  586. Token* a;
  587. int alen;
  588. int ai;
  589. int starti;
  590. int c;
  591. int tag;
  592. if(!lexinited)
  593. lexinit();
  594. ts = newtokensource(data, datalen, chset, mtype);
  595. if(dbglex)
  596. fprint(2, "_gettoks starts, ts.i=%d, ts.edata=%d\n", ts->i, ts->edata);
  597. alen = 0;
  598. ai = 0;
  599. a = 0;
  600. if(ts->mtype == TextHtml) {
  601. for(;;) {
  602. if(alen - ai < ToksChunk/32) {
  603. alen += ToksChunk;
  604. a = erealloc(a, alen*sizeof *a);
  605. }
  606. starti = ts->i;
  607. c = getchar(ts);
  608. if(c < 0)
  609. break;
  610. if(c == '<') {
  611. tag = gettag(ts, starti, a, &ai);
  612. if(tag == Tscript || tag == Tstyle) {
  613. // special rules for getting Data after....
  614. starti = ts->i;
  615. c = getchar(ts);
  616. tag = getscriptdata(ts, c, starti, a, &ai, tag);
  617. }
  618. }
  619. else
  620. tag = getdata(ts, c, starti, a, &ai);
  621. if(tag == -1)
  622. break;
  623. else if(dbglex > 1 && tag != Comment)
  624. fprint(2, "lex: got token %T\n", &a[ai-1]);
  625. }
  626. }
  627. else {
  628. // plain text (non-html) tokens
  629. for(;;) {
  630. if(alen - ai < ToksChunk/32) {
  631. alen += ToksChunk;
  632. a = erealloc(a, alen*sizeof *a);
  633. }
  634. tag = getplaindata(ts, a, &ai);
  635. if(tag == -1)
  636. break;
  637. if(dbglex > 1)
  638. fprint(2, "lex: got token %T\n", &a[ai]);
  639. }
  640. }
  641. free(ts);
  642. if(dbglex)
  643. fprint(2, "lex: returning %d tokens\n", ai);
  644. *plen = ai;
  645. if(ai == 0){
  646. free(a);
  647. a = 0;
  648. }
  649. return a;
  650. }
  651. // For case where source isn't HTML.
  652. // Just make data tokens, one per line (or partial line,
  653. // at end of buffer), ignoring non-whitespace control
  654. // characters and dumping \r's.
  655. // If find non-empty token, fill in a[*pai], bump *pai, and return Data.
  656. // Otherwise return -1;
  657. static int
  658. getplaindata(TokenSource* ts, Token* a, int* pai)
  659. {
  660. Rune* s;
  661. int j;
  662. int starti;
  663. int c;
  664. Token* tok;
  665. Rune buf[BIGBUFSIZE];
  666. s = nil;
  667. j = 0;
  668. starti = ts->i;
  669. for(c = getchar(ts); c >= 0; c = getchar(ts)) {
  670. if(c < ' ') {
  671. if(isspace(c)) {
  672. if(c == '\r') {
  673. // ignore it unless no following '\n',
  674. // in which case treat it like '\n'
  675. c = getchar(ts);
  676. if(c != '\n') {
  677. if(c >= 0)
  678. ungetchar(ts, c);
  679. c = '\n';
  680. }
  681. }
  682. }
  683. else
  684. c = 0;
  685. }
  686. if(c != 0) {
  687. buf[j++] = c;
  688. if(j == nelem(buf)-1) {
  689. s = buftostr(s, buf, j);
  690. j = 0;
  691. }
  692. }
  693. if(c == '\n')
  694. break;
  695. }
  696. s = buftostr(s, buf, j);
  697. if(s == nil)
  698. return -1;
  699. tok = &a[(*pai)++];
  700. tok->tag = Data;
  701. tok->text = s;
  702. tok->attr = nil;
  703. tok->starti = starti;
  704. return Data;
  705. }
  706. // Return concatenation of s and buf[0:j]
  707. static Rune*
  708. buftostr(Rune* s, Rune* buf, int j)
  709. {
  710. int i;
  711. if(s == nil)
  712. s = _Strndup(buf, j);
  713. else {
  714. i = _Strlen(s);
  715. s = realloc(s, ( i+j+1)*sizeof *s);
  716. memcpy(&s[i], buf, j*sizeof *s);
  717. s[i+j] = 0;
  718. }
  719. return s;
  720. }
  721. // Gather data up to next start-of-tag or end-of-buffer.
  722. // Translate entity references (&amp;).
  723. // Ignore non-whitespace control characters and get rid of \r's.
  724. // If find non-empty token, fill in a[*pai], bump *pai, and return Data.
  725. // Otherwise return -1;
  726. static int
  727. getdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai)
  728. {
  729. Rune* s;
  730. int j;
  731. int c;
  732. Token* tok;
  733. Rune buf[SMALLBUFSIZE];
  734. s = nil;
  735. j = 0;
  736. for(c = firstc; c >= 0; c = getchar(ts)){
  737. if(c == '&') {
  738. c = ampersand(ts);
  739. if(c < 0)
  740. break;
  741. }
  742. else if(c < ' ') {
  743. if(isspace(c)) {
  744. if(c == '\r') {
  745. // ignore it unless no following '\n',
  746. // in which case treat it like '\n'
  747. c = getchar(ts);
  748. if(c != '\n') {
  749. if(c >= 0)
  750. ungetchar(ts, c);
  751. c = '\n';
  752. }
  753. }
  754. }
  755. else {
  756. if(warn)
  757. fprint(2, "warning: non-whitespace control character %d ignored\n", c);
  758. c = 0;
  759. }
  760. }
  761. else if(c == '<') {
  762. ungetchar(ts, c);
  763. break;
  764. }
  765. if(c != 0) {
  766. buf[j++] = c;
  767. if(j == nelem(buf)-1) {
  768. s = buftostr(s, buf, j);
  769. j = 0;
  770. }
  771. }
  772. }
  773. s = buftostr(s, buf, j);
  774. if(s == nil)
  775. return -1;
  776. tok = &a[(*pai)++];
  777. tok->tag = Data;
  778. tok->text = s;
  779. tok->attr = nil;
  780. tok->starti = starti;
  781. return Data;
  782. }
  783. // The rules for lexing scripts are different (ugh).
  784. // Gather up everything until see an "</" tagnames[tok] ">"
  785. static int
  786. getscriptdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai, int findtag)
  787. {
  788. Rune* s;
  789. int j;
  790. int tstarti;
  791. int savei;
  792. int c;
  793. int tag;
  794. int done;
  795. Token* tok;
  796. Rune buf[BIGBUFSIZE];
  797. s = nil;
  798. j = 0;
  799. tstarti = starti;
  800. c = firstc;
  801. done = 0;
  802. while(c >= 0) {
  803. if(c == '<') {
  804. // other browsers ignore stuff to end of line after <!
  805. savei = ts->i;
  806. c = getchar(ts);
  807. if(c == '!') {
  808. if(comment(ts) == -1)
  809. break;
  810. if(c == '\r')
  811. c = getchar(ts);
  812. if(c == '\n')
  813. c = getchar(ts);
  814. }
  815. else if(c >= 0) {
  816. backup(ts, savei);
  817. tag = gettag(ts, tstarti, a, pai);
  818. if(tag == -1)
  819. break;
  820. if(tag != Comment)
  821. (*pai)--;
  822. backup(ts, tstarti);
  823. if(tag == findtag + RBRA) {
  824. done = 1;
  825. break;
  826. }
  827. // here tag was not the one we were looking for, so take as regular data
  828. c = getchar(ts);
  829. }
  830. }
  831. if(c < 0)
  832. break;
  833. if(c != 0) {
  834. buf[j++] = c;
  835. if(j == nelem(buf)-1) {
  836. s = buftostr(s, buf, j);
  837. j = 0;
  838. }
  839. }
  840. tstarti = ts->i;
  841. c = getchar(ts);
  842. }
  843. if(done || ts->i == ts->edata) {
  844. s = buftostr(s, buf, j);
  845. tok = &a[(*pai)++];
  846. tok->tag = Data;
  847. tok->text = s;
  848. tok->attr = nil;
  849. tok->starti = starti;
  850. return Data;
  851. }
  852. free(s);
  853. backup(ts, starti);
  854. return -1;
  855. }
  856. // We've just seen a '<'. Gather up stuff to closing '>' (if buffer
  857. // ends before then, return -1).
  858. // If it's a tag, look up the name, gather the attributes, and return
  859. // the appropriate token.
  860. // Else it's either just plain data or some kind of ignorable stuff:
  861. // return Data or Comment as appropriate.
  862. // If it's not a Comment, put it in a[*pai] and bump *pai.
  863. static int
  864. gettag(TokenSource* ts, int starti, Token* a, int* pai)
  865. {
  866. int rbra;
  867. int ans;
  868. Attr* al;
  869. int nexti;
  870. int c;
  871. int ti;
  872. int afnd;
  873. int attid;
  874. int quote;
  875. Rune* val;
  876. int nv;
  877. int i;
  878. int tag;
  879. Token* tok;
  880. Rune buf[BIGBUFSIZE];
  881. rbra = 0;
  882. nexti = ts->i;
  883. tok = &a[*pai];
  884. tok->tag = Notfound;
  885. tok->text = nil;
  886. tok->attr = nil;
  887. tok->starti = starti;
  888. c = getchar(ts);
  889. if(c == '/') {
  890. rbra = RBRA;
  891. c = getchar(ts);
  892. }
  893. if(c < 0)
  894. goto eob_done;
  895. if(c >= 256 || !isalpha(c)) {
  896. // not a tag
  897. if(c == '!') {
  898. ans = comment(ts);
  899. if(ans != -1)
  900. return ans;
  901. goto eob_done;
  902. }
  903. else {
  904. backup(ts, nexti);
  905. tok->tag = Data;
  906. tok->text = _Strdup(L"<");
  907. (*pai)++;
  908. return Data;
  909. }
  910. }
  911. // c starts a tagname
  912. buf[0] = c;
  913. i = 1;
  914. while(1) {
  915. c = getchar(ts);
  916. if(c < 0)
  917. goto eob_done;
  918. if(!ISNAMCHAR(c))
  919. break;
  920. // if name is bigger than buf it won't be found anyway...
  921. if(i < BIGBUFSIZE)
  922. buf[i++] = c;
  923. }
  924. if(_lookup(tagtable, Numtags, buf, i, &tag))
  925. tok->tag = tag + rbra;
  926. else
  927. tok->text = _Strndup(buf, i); // for warning print, in build
  928. // attribute gathering loop
  929. al = nil;
  930. while(1) {
  931. // look for "ws name" or "ws name ws = ws val" (ws=whitespace)
  932. // skip whitespace
  933. attrloop_continue:
  934. while(c < 256 && isspace(c)) {
  935. c = getchar(ts);
  936. if(c < 0)
  937. goto eob_done;
  938. }
  939. if(c == '>')
  940. goto attrloop_done;
  941. if(c == '<') {
  942. if(warn)
  943. fprint(2, "warning: unclosed tag\n");
  944. ungetchar(ts, c);
  945. goto attrloop_done;
  946. }
  947. if(c >= 256 || !isalpha(c)) {
  948. if(warn)
  949. fprint(2, "warning: expected attribute name\n");
  950. // skipt to next attribute name
  951. while(1) {
  952. c = getchar(ts);
  953. if(c < 0)
  954. goto eob_done;
  955. if(c < 256 && isalpha(c))
  956. goto attrloop_continue;
  957. if(c == '<') {
  958. if(warn)
  959. fprint(2, "warning: unclosed tag\n");
  960. ungetchar(ts, 60);
  961. goto attrloop_done;
  962. }
  963. if(c == '>')
  964. goto attrloop_done;
  965. }
  966. }
  967. // gather attribute name
  968. buf[0] = c;
  969. i = 1;
  970. while(1) {
  971. c = getchar(ts);
  972. if(c < 0)
  973. goto eob_done;
  974. if(!ISNAMCHAR(c))
  975. break;
  976. if(i < BIGBUFSIZE-1)
  977. buf[i++] = c;
  978. }
  979. afnd = _lookup(attrtable, Numattrs, buf, i, &attid);
  980. if(warn && !afnd) {
  981. buf[i] = 0;
  982. fprint(2, "warning: unknown attribute name %S\n", buf);
  983. }
  984. // skip whitespace
  985. while(c < 256 && isspace(c)) {
  986. c = getchar(ts);
  987. if(c < 0)
  988. goto eob_done;
  989. }
  990. if(c != '=') {
  991. if(afnd)
  992. al = newattr(attid, nil, al);
  993. goto attrloop_continue;
  994. }
  995. //# c is '=' here; skip whitespace
  996. while(1) {
  997. c = getchar(ts);
  998. if(c < 0)
  999. goto eob_done;
  1000. if(c >= 256 || !isspace(c))
  1001. break;
  1002. }
  1003. quote = 0;
  1004. if(c == '\'' || c == '"') {
  1005. quote = c;
  1006. c = getchar(ts);
  1007. if(c < 0)
  1008. goto eob_done;
  1009. }
  1010. val = nil;
  1011. nv = 0;
  1012. while(1) {
  1013. valloop_continue:
  1014. if(c < 0)
  1015. goto eob_done;
  1016. if(c == '>') {
  1017. if(quote) {
  1018. // c might be part of string (though not good style)
  1019. // but if line ends before close quote, assume
  1020. // there was an unmatched quote
  1021. ti = ts->i;
  1022. while(1) {
  1023. c = getchar(ts);
  1024. if(c < 0)
  1025. goto eob_done;
  1026. if(c == quote) {
  1027. backup(ts, ti);
  1028. buf[nv++] = '>';
  1029. if(nv == BIGBUFSIZE-1) {
  1030. val = buftostr(val, buf, nv);
  1031. nv = 0;
  1032. }
  1033. c = getchar(ts);
  1034. goto valloop_continue;
  1035. }
  1036. if(c == '\n') {
  1037. if(warn)
  1038. fprint(2, "warning: apparent unmatched quote\n");
  1039. backup(ts, ti);
  1040. c = '>';
  1041. goto valloop_done;
  1042. }
  1043. }
  1044. }
  1045. else
  1046. goto valloop_done;
  1047. }
  1048. if(quote) {
  1049. if(c == quote) {
  1050. c = getchar(ts);
  1051. if(c < 0)
  1052. goto eob_done;
  1053. goto valloop_done;
  1054. }
  1055. if(c == '\r') {
  1056. c = getchar(ts);
  1057. goto valloop_continue;
  1058. }
  1059. if(c == '\t' || c == '\n')
  1060. c = ' ';
  1061. }
  1062. else {
  1063. if(c < 256 && isspace(c))
  1064. goto valloop_done;
  1065. }
  1066. if(c == '&') {
  1067. c = ampersand(ts);
  1068. if(c == -1)
  1069. goto eob_done;
  1070. }
  1071. buf[nv++] = c;
  1072. if(nv == BIGBUFSIZE-1) {
  1073. val = buftostr(val, buf, nv);
  1074. nv = 0;
  1075. }
  1076. c = getchar(ts);
  1077. }
  1078. valloop_done:
  1079. if(afnd) {
  1080. val = buftostr(val, buf, nv);
  1081. al = newattr(attid, val, al);
  1082. }
  1083. }
  1084. attrloop_done:
  1085. tok->attr = al;
  1086. (*pai)++;
  1087. return tok->tag;
  1088. eob_done:
  1089. if(warn)
  1090. fprint(2, "warning: incomplete tag at end of page\n");
  1091. backup(ts, nexti);
  1092. tok->tag = Data;
  1093. tok->text = _Strdup(L"<");
  1094. return Data;
  1095. }
  1096. // We've just read a '<!' at position starti,
  1097. // so this may be a comment or other ignored section, or it may
  1098. // be just a literal string if there is no close before end of file
  1099. // (other browsers do that).
  1100. // The accepted practice seems to be (note: contrary to SGML spec!):
  1101. // If see <!--, look for --> to close, or if none, > to close.
  1102. // If see <!(not --), look for > to close.
  1103. // If no close before end of file, leave original characters in as literal data.
  1104. //
  1105. // If we see ignorable stuff, return Comment.
  1106. // Else return nil (caller should back up and try again when more data arrives,
  1107. // unless at end of file, in which case caller should just make '<' a data token).
  1108. static int
  1109. comment(TokenSource* ts)
  1110. {
  1111. int nexti;
  1112. int havecomment;
  1113. int c;
  1114. nexti = ts->i;
  1115. havecomment = 0;
  1116. c = getchar(ts);
  1117. if(c == '-') {
  1118. c = getchar(ts);
  1119. if(c == '-') {
  1120. if(findstr(ts, L"-->"))
  1121. havecomment = 1;
  1122. else
  1123. backup(ts, nexti);
  1124. }
  1125. }
  1126. if(!havecomment) {
  1127. if(c == '>')
  1128. havecomment = 1;
  1129. else if(c >= 0) {
  1130. if(findstr(ts, L">"))
  1131. havecomment = 1;
  1132. }
  1133. }
  1134. if(havecomment)
  1135. return Comment;
  1136. return -1;
  1137. }
  1138. // Look for string s in token source.
  1139. // If found, return 1, with buffer at next char after s,
  1140. // else return 0 (caller should back up).
  1141. static int
  1142. findstr(TokenSource* ts, Rune* s)
  1143. {
  1144. int c0;
  1145. int n;
  1146. int nexti;
  1147. int i;
  1148. int c;
  1149. c0 = s[0];
  1150. n = runestrlen(s);
  1151. while(1) {
  1152. c = getchar(ts);
  1153. if(c < 0)
  1154. break;
  1155. if(c == c0) {
  1156. if(n == 1)
  1157. return 1;
  1158. nexti = ts->i;
  1159. for(i = 1; i < n; i++) {
  1160. c = getchar(ts);
  1161. if(c < 0)
  1162. goto mainloop_done;
  1163. if(c != s[i])
  1164. break;
  1165. }
  1166. if(i == n)
  1167. return 1;
  1168. backup(ts, nexti);
  1169. }
  1170. }
  1171. mainloop_done:
  1172. return 0;
  1173. }
  1174. // We've just read an '&'; look for an entity reference
  1175. // name, and if found, return translated char.
  1176. // if there is a complete entity name but it isn't known,
  1177. // back up to just past the '&' and return '&'.
  1178. // If the entity can't be completed in the current buffer, back up
  1179. // to the '&' and return -1.
  1180. static int
  1181. ampersand(TokenSource* ts)
  1182. {
  1183. int savei;
  1184. int c;
  1185. int fnd;
  1186. int ans;
  1187. int v;
  1188. int k;
  1189. Rune buf[25];
  1190. savei = ts->i;
  1191. c = getchar(ts);
  1192. fnd = 0;
  1193. ans = -1;
  1194. if(c == '#') {
  1195. c = getchar(ts);
  1196. v = 0;
  1197. if(c == 'X' || c == 'x')
  1198. for(c = getchar(ts); c < 256; c = getchar(ts))
  1199. if(c >= '0' && c <= '9')
  1200. v = v*16+c-'0';
  1201. else if(c >= 'A' && c<= 'F')
  1202. v = v*16+c-'A'+10;
  1203. else if(c >= 'a' && c <= 'f')
  1204. v = v*16+c-'a'+10;
  1205. else
  1206. break;
  1207. else
  1208. while(c >= 0) {
  1209. if(!(c < 256 && isdigit(c)))
  1210. break;
  1211. v = v*10 + c - 48;
  1212. c = getchar(ts);
  1213. }
  1214. if(c >= 0) {
  1215. if(!(c == ';' || c == '\n' || c == '\r'))
  1216. ungetchar(ts, c);
  1217. c = v;
  1218. if(c == 160)
  1219. c = 160;
  1220. if(c >= Winstart && c <= Winend) {
  1221. c = winchars[c - Winstart];
  1222. }
  1223. ans = c;
  1224. fnd = 1;
  1225. }
  1226. }
  1227. else if(c < 256 && isalpha(c)) {
  1228. buf[0] = c;
  1229. k = 1;
  1230. while(1) {
  1231. c = getchar(ts);
  1232. if(c < 0)
  1233. break;
  1234. if(c < 256 && (isalpha(c) || isdigit(c))) {
  1235. if(k < nelem(buf)-1)
  1236. buf[k++] = c;
  1237. }
  1238. else {
  1239. if(!(c == ';' || c == '\n' || c == '\r'))
  1240. ungetchar(ts, c);
  1241. break;
  1242. }
  1243. }
  1244. if(c >= 256 || c != '=' && !(isalpha(c) || isdigit(c)))
  1245. fnd = _lookup(chartab, NCHARTAB, buf, k, &ans);
  1246. }
  1247. if(!fnd) {
  1248. backup(ts, savei);
  1249. ans = '&';
  1250. }
  1251. return ans;
  1252. }
  1253. // Get next char, obeying ts.chset.
  1254. // Returns -1 if no complete character left before current end of data.
  1255. static int
  1256. getchar(TokenSource* ts)
  1257. {
  1258. uint8_t* buf;
  1259. int c;
  1260. int n;
  1261. int ok;
  1262. Rune r;
  1263. if(ts->i >= ts->edata)
  1264. return -1;
  1265. buf = ts->data;
  1266. c = buf[ts->i];
  1267. switch(ts->chset) {
  1268. case ISO_8859_1:
  1269. if(c >= Winstart && c <= Winend)
  1270. c = winchars[c - Winstart];
  1271. ts->i++;
  1272. break;
  1273. case US_Ascii:
  1274. if(c > 127) {
  1275. if(warn)
  1276. fprint(2, "non-ascii char (%x) when US-ASCII specified\n", c);
  1277. }
  1278. ts->i++;
  1279. break;
  1280. case UTF_8:
  1281. ok = fullrune((char*)(buf+ts->i), ts->edata-ts->i);
  1282. n = chartorune(&r, (char*)(buf+ts->i));
  1283. if(ok) {
  1284. if(warn && c == 0x80)
  1285. fprint(2, "warning: invalid utf-8 sequence (starts with %x)\n", ts->data[ts->i]);
  1286. ts->i += n;
  1287. c = r;
  1288. }
  1289. else {
  1290. // not enough bytes in buf to complete utf-8 char
  1291. ts->i = ts->edata; // mark "all used"
  1292. c = -1;
  1293. }
  1294. break;
  1295. case Unicode:
  1296. if(ts->i < ts->edata - 1) {
  1297. //standards say most-significant byte first
  1298. c = (c << 8)|(buf[ts->i + 1]);
  1299. ts->i += 2;
  1300. }
  1301. else {
  1302. ts->i = ts->edata; // mark "all used"
  1303. c = -1;
  1304. }
  1305. break;
  1306. default:
  1307. return -1;
  1308. }
  1309. return c;
  1310. }
  1311. // Assuming c was the last character returned by getchar, set
  1312. // things up so that next getchar will get that same character
  1313. // followed by the current 'next character', etc.
  1314. static void
  1315. ungetchar(TokenSource* ts, int c)
  1316. {
  1317. int n;
  1318. Rune r;
  1319. char a[UTFmax];
  1320. n = 1;
  1321. switch(ts->chset) {
  1322. case UTF_8:
  1323. if(c >= 128) {
  1324. r = c;
  1325. n = runetochar(a, &r);
  1326. }
  1327. break;
  1328. case Unicode:
  1329. n = 2;
  1330. break;
  1331. }
  1332. ts->i -= n;
  1333. }
  1334. // Restore ts so that it is at the state where the index was savei.
  1335. static void
  1336. backup(TokenSource* ts, int savei)
  1337. {
  1338. if(dbglex)
  1339. fprint(2, "lex: backup; i=%d, savei=%d\n", ts->i, savei);
  1340. ts->i = savei;
  1341. }
  1342. // Look for value associated with attribute attid in token t.
  1343. // If there is one, return 1 and put the value in *pans,
  1344. // else return 0.
  1345. // If xfer is true, transfer ownership of the string to the caller
  1346. // (nil it out here); otherwise, caller must duplicate the answer
  1347. // if it needs to save it.
  1348. // OK to have pans==0, in which case this is just looking
  1349. // to see if token is present.
  1350. int
  1351. _tokaval(Token* t, int attid, Rune** pans, int xfer)
  1352. {
  1353. Attr* attr;
  1354. attr = t->attr;
  1355. while(attr != nil) {
  1356. if(attr->attid == attid) {
  1357. if(pans != nil)
  1358. *pans = attr->value;
  1359. if(xfer)
  1360. attr->value = nil;
  1361. return 1;
  1362. }
  1363. attr = attr->next;
  1364. }
  1365. if(pans != nil)
  1366. *pans = nil;
  1367. return 0;
  1368. }
  1369. static int
  1370. Tconv(Fmt *f)
  1371. {
  1372. Token* t;
  1373. int i;
  1374. int tag;
  1375. char* srbra;
  1376. Rune* aname;
  1377. Rune* tname;
  1378. Attr* a;
  1379. char buf[BIGBUFSIZE];
  1380. t = va_arg(f->args, Token*);
  1381. if(t == nil)
  1382. sprint(buf, "<null>");
  1383. else {
  1384. i = 0;
  1385. if(dbglex > 1)
  1386. i = snprint(buf, sizeof(buf), "[%d]", t->starti);
  1387. tag = t->tag;
  1388. if(tag == Data) {
  1389. i += snprint(buf+i, sizeof(buf)-i-1, "'%S'", t->text);
  1390. }
  1391. else {
  1392. srbra = "";
  1393. if(tag >= RBRA) {
  1394. tag -= RBRA;
  1395. srbra = "/";
  1396. }
  1397. tname = tagnames[tag];
  1398. if(tag == Notfound)
  1399. tname = L"?";
  1400. i += snprint(buf+i, sizeof(buf)-i-1, "<%s%S", srbra, tname);
  1401. for(a = t->attr; a != nil; a = a->next) {
  1402. aname = attrnames[a->attid];
  1403. i += snprint(buf+i, sizeof(buf)-i-1, " %S", aname);
  1404. if(a->value != nil)
  1405. i += snprint(buf+i, sizeof(buf)-i-1, "=%S", a->value);
  1406. }
  1407. i += snprint(buf+i, sizeof(buf)-i-1, ">");
  1408. }
  1409. buf[i] = 0;
  1410. }
  1411. return fmtstrcpy(f, buf);
  1412. }
  1413. // Attrs own their constituent strings, but build may eventually
  1414. // transfer some values to its items and nil them out in the Attr.
  1415. static Attr*
  1416. newattr(int attid, Rune* value, Attr* link)
  1417. {
  1418. Attr* ans;
  1419. ans = (Attr*)emalloc(sizeof(Attr));
  1420. ans->attid = attid;
  1421. ans->value = value;
  1422. ans->next = link;
  1423. return ans;
  1424. }
  1425. // Free list of Attrs linked through next field
  1426. static void
  1427. freeattrs(Attr* ahead)
  1428. {
  1429. Attr* a;
  1430. Attr* nexta;
  1431. a = ahead;
  1432. while(a != nil) {
  1433. nexta = a->next;
  1434. free(a->value);
  1435. free(a);
  1436. a = nexta;
  1437. }
  1438. }
  1439. // Free array of Tokens.
  1440. // Allocated space might have room for more than n tokens,
  1441. // but only n of them are initialized.
  1442. // If caller has transferred ownership of constitutent strings
  1443. // or attributes, it must have nil'd out the pointers in the Tokens.
  1444. void
  1445. _freetokens(Token* tarray, int n)
  1446. {
  1447. int i;
  1448. Token* t;
  1449. if(tarray == nil)
  1450. return;
  1451. for(i = 0; i < n; i++) {
  1452. t = &tarray[i];
  1453. free(t->text);
  1454. freeattrs(t->attr);
  1455. }
  1456. free(tarray);
  1457. }