oed.c 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. #include <bio.h>
  12. #include "dict.h"
  13. enum {
  14. Buflen=1000,
  15. Maxaux=5,
  16. };
  17. /* Possible tags */
  18. enum {
  19. A, /* author in quote (small caps) */
  20. B, /* bold */
  21. Ba, /* author inside bib */
  22. Bch, /* builtup chem component */
  23. Bib, /* surrounds word 'in' for bibliographic ref */
  24. Bl, /* bold */
  25. Bo, /* bond over */
  26. Bu, /* bond under */
  27. Cb, /* ? block of stuff (indent) */
  28. Cf, /* cross ref to another entry (italics) */
  29. Chem, /* chemistry formula */
  30. Co, /* over (preceding sum, integral, etc.) */
  31. Col, /* column of table (aux just may be r) */
  32. Cu, /* under (preceding sum, integral, etc.) */
  33. Dat, /* date */
  34. Db, /* def block? indent */
  35. Dn, /* denominator of fraction */
  36. E, /* main entry */
  37. Ed, /* editor's comments (in [...]) */
  38. Etym, /* etymology (in [...]) */
  39. Fq, /* frequency count (superscript) */
  40. Form, /* formula */
  41. Fr, /* fraction (contains <nu>, then <dn>) */
  42. Gk, /* greek (transliteration) */
  43. Gr, /* grammar? (e.g., around 'pa.' in 'pa. pple.') */
  44. Hg, /* headword group */
  45. Hm, /* homonym (superscript) */
  46. Hw, /* headword (bold) */
  47. I, /* italics */
  48. Il, /* italic list? */
  49. In, /* inferior (subscript) */
  50. L, /* row of col of table */
  51. La, /* status or usage label (italic) */
  52. Lc, /* chapter/verse sort of thing for works */
  53. N, /* note (smaller type) */
  54. Nu, /* numerator of fraction */
  55. Ov, /* needs overline */
  56. P, /* paragraph (indent) */
  57. Ph, /* pronunciation (transliteration) */
  58. Pi, /* pile (frac without line) */
  59. Pqp, /* subblock of quote */
  60. Pr, /* pronunciation (in (...)) */
  61. Ps, /* position (e.g., adv.) (italic) */
  62. Pt, /* part (in lc) */
  63. Q, /* quote in quote block */
  64. Qd, /* quote date (bold) */
  65. Qig, /* quote number (greek) */
  66. Qla, /* status or usage label in quote (italic) */
  67. Qp, /* quote block (small type, indent) */
  68. Qsn, /* quote number */
  69. Qt, /* quote words */
  70. R, /* roman type style */
  71. Rx, /* relative cross reference (e.g., next) */
  72. S, /* another form? (italic) */
  73. S0, /* sense (sometimes surrounds several sx's) */
  74. S1, /* sense (aux num: indented bold letter) */
  75. S2, /* sense (aux num: indented bold capital rom num) */
  76. S3, /* sense (aux num: indented number of asterisks) */
  77. S4, /* sense (aux num: indented bold number) */
  78. S5, /* sense (aux num: indented number of asterisks) */
  79. S6, /* subsense (aux num: bold letter) */
  80. S7a, /* subsense (aux num: letter) */
  81. S7n, /* subsense (aux num: roman numeral) */
  82. Sc, /* small caps */
  83. Sgk, /* subsense (aux num: transliterated greek) */
  84. Sn, /* sense of subdefinition (aux num: roman letter) */
  85. Ss, /* sans serif */
  86. Ssb, /* sans serif bold */
  87. Ssi, /* sans serif italic */
  88. Su, /* superior (superscript) */
  89. Sub, /* subdefinition */
  90. Table, /* table (aux cols=number of columns) */
  91. Tt, /* title? (italics) */
  92. Vd, /* numeric label for variant form */
  93. Ve, /* variant entry */
  94. Vf, /* variant form (light bold) */
  95. Vfl, /* list of vf's (starts with Also or Forms) */
  96. W, /* work (e.g., Beowulf) (italics) */
  97. X, /* cross reference to main word (small caps) */
  98. Xd, /* cross reference to quotation by date */
  99. Xi, /* internal cross reference ? (italic) */
  100. Xid, /* cross reference identifer, in quote ? */
  101. Xs, /* cross reference sense (lower number) */
  102. Xr, /* list of x's */
  103. Ntag /* end of tags */
  104. };
  105. /* Assoc tables must be sorted on first field */
  106. static Assoc tagtab[] = {
  107. {"a", A},
  108. {"b", B},
  109. {"ba", Ba},
  110. {"bch", Bch},
  111. {"bib", Bib},
  112. {"bl", Bl},
  113. {"bo", Bo},
  114. {"bu", Bu},
  115. {"cb", Cb},
  116. {"cf", Cf},
  117. {"chem", Chem},
  118. {"co", Co},
  119. {"col", Col},
  120. {"cu", Cu},
  121. {"dat", Dat},
  122. {"db", Db},
  123. {"dn", Dn},
  124. {"e", E},
  125. {"ed", Ed},
  126. {"et", Etym},
  127. {"etym", Etym},
  128. {"form", Form},
  129. {"fq", Fq},
  130. {"fr", Fr},
  131. {"frac", Fr},
  132. {"gk", Gk},
  133. {"gr", Gr},
  134. {"hg", Hg},
  135. {"hm", Hm},
  136. {"hw", Hw},
  137. {"i", I},
  138. {"il", Il},
  139. {"in", In},
  140. {"l", L},
  141. {"la", La},
  142. {"lc", Lc},
  143. {"n", N},
  144. {"nu", Nu},
  145. {"ov", Ov},
  146. {"p", P},
  147. {"ph", Ph},
  148. {"pi", Pi},
  149. {"pqp", Pqp},
  150. {"pr", Pr},
  151. {"ps", Ps},
  152. {"pt", Pt},
  153. {"q", Q},
  154. {"qd", Qd},
  155. {"qig", Qig},
  156. {"qla", Qla},
  157. {"qp", Qp},
  158. {"qsn", Qsn},
  159. {"qt", Qt},
  160. {"r", R},
  161. {"rx", Rx},
  162. {"s", S},
  163. {"s0", S0},
  164. {"s1", S1},
  165. {"s2", S2},
  166. {"s3", S3},
  167. {"s4", S4},
  168. {"s5", S5},
  169. {"s6", S6},
  170. {"s7a", S7a},
  171. {"s7n", S7n},
  172. {"sc", Sc},
  173. {"sgk", Sgk},
  174. {"sn", Sn},
  175. {"ss", Ss,},
  176. {"ssb", Ssb},
  177. {"ssi", Ssi},
  178. {"su", Su},
  179. {"sub", Sub},
  180. {"table", Table},
  181. {"tt", Tt},
  182. {"vd", Vd},
  183. {"ve", Ve},
  184. {"vf", Vf},
  185. {"vfl", Vfl},
  186. {"w", W},
  187. {"x", X},
  188. {"xd", Xd},
  189. {"xi", Xi},
  190. {"xid", Xid},
  191. {"xr", Xr},
  192. {"xs", Xs},
  193. };
  194. /* Possible tag auxilliary info */
  195. enum {
  196. Cols, /* number of columns in a table */
  197. Num, /* letter or number, for a sense */
  198. St, /* status (e.g., obs) */
  199. Naux
  200. };
  201. static Assoc auxtab[] = {
  202. {"cols", Cols},
  203. {"num", Num},
  204. {"st", St}
  205. };
  206. static Assoc spectab[] = {
  207. {"3on4", L'¾'},
  208. {"Aacu", L'Á'},
  209. {"Aang", L'Å'},
  210. {"Abarab", L'Ā'},
  211. {"Acirc", L'Â'},
  212. {"Ae", L'Æ'},
  213. {"Agrave", L'À'},
  214. {"Alpha", L'Α'},
  215. {"Amac", L'Ā'},
  216. {"Asg", L'Ʒ'}, /* Unicyle. Cf "Sake" */
  217. {"Auml", L'Ä'},
  218. {"Beta", L'Β'},
  219. {"Cced", L'Ç'},
  220. {"Chacek", L'Č'},
  221. {"Chi", L'Χ'},
  222. {"Chirho", L'☧'}, /* Chi Rho U+2627 */
  223. {"Csigma", L'Ϛ'},
  224. {"Delta", L'Δ'},
  225. {"Eacu", L'É'},
  226. {"Ecirc", L'Ê'},
  227. {"Edh", L'Ð'},
  228. {"Epsilon", L'Ε'},
  229. {"Eta", L'Η'},
  230. {"Gamma", L'Γ'},
  231. {"Iacu", L'Í'},
  232. {"Icirc", L'Î'},
  233. {"Imac", L'Ī'},
  234. {"Integ", L'∫'},
  235. {"Iota", L'Ι'},
  236. {"Kappa", L'Κ'},
  237. {"Koppa", L'Ϟ'},
  238. {"Lambda", L'Λ'},
  239. {"Lbar", L'Ł'},
  240. {"Mu", L'Μ'},
  241. {"Naira", L'N'}, /* should have bar through */
  242. {"Nplus", L'N'}, /* should have plus above */
  243. {"Ntilde", L'Ñ'},
  244. {"Nu", L'Ν'},
  245. {"Oacu", L'Ó'},
  246. {"Obar", L'Ø'},
  247. {"Ocirc", L'Ô'},
  248. {"Oe", L'Œ'},
  249. {"Omega", L'Ω'},
  250. {"Omicron", L'Ο'},
  251. {"Ouml", L'Ö'},
  252. {"Phi", L'Φ'},
  253. {"Pi", L'Π'},
  254. {"Psi", L'Ψ'},
  255. {"Rho", L'Ρ'},
  256. {"Sacu", L'Ś'},
  257. {"Sigma", L'Σ'},
  258. {"Summ", L'∑'},
  259. {"Tau", L'Τ'},
  260. {"Th", L'Þ'},
  261. {"Theta", L'Θ'},
  262. {"Tse", L'Ц'},
  263. {"Uacu", L'Ú'},
  264. {"Ucirc", L'Û'},
  265. {"Upsilon", L'Υ'},
  266. {"Uuml", L'Ü'},
  267. {"Wyn", L'ƿ'}, /* wynn U+01BF */
  268. {"Xi", L'Ξ'},
  269. {"Ygh", L'Ʒ'}, /* Yogh U+01B7 */
  270. {"Zeta", L'Ζ'},
  271. {"Zh", L'Ʒ'}, /* looks like Yogh. Cf "Sake" */
  272. {"a", L'a'}, /* ante */
  273. {"aacu", L'á'},
  274. {"aang", L'å'},
  275. {"aasper", MAAS},
  276. {"abreve", L'ă'},
  277. {"acirc", L'â'},
  278. {"acu", LACU},
  279. {"ae", L'æ'},
  280. {"agrave", L'à'},
  281. {"ahook", L'ą'},
  282. {"alenis", MALN},
  283. {"alpha", L'α'},
  284. {"amac", L'ā'},
  285. {"amp", L'&'},
  286. {"and", MAND},
  287. {"ang", LRNG},
  288. {"angle", L'∠'},
  289. {"ankh", L'☥'}, /* ankh U+2625 */
  290. {"ante", L'a'}, /* before (year) */
  291. {"aonq", MAOQ},
  292. {"appreq", L'≃'},
  293. {"aquar", L'♒'},
  294. {"arDadfull", L'ض'}, /* Dad U+0636 */
  295. {"arHa", L'ح'}, /* haa U+062D */
  296. {"arTa", L'ت'}, /* taa U+062A */
  297. {"arain", L'ع'}, /* ain U+0639 */
  298. {"arainfull", L'ع'}, /* ain U+0639 */
  299. {"aralif", L'ا'}, /* alef U+0627 */
  300. {"arba", L'ب'}, /* baa U+0628 */
  301. {"arha", L'ه'}, /* ha U+0647 */
  302. {"aries", L'♈'},
  303. {"arnun", L'ن'}, /* noon U+0646 */
  304. {"arnunfull", L'ن'}, /* noon U+0646 */
  305. {"arpa", L'ه'}, /* ha U+0647 */
  306. {"arqoph", L'ق'}, /* qaf U+0642 */
  307. {"arshinfull", L'ش'}, /* sheen U+0634 */
  308. {"arta", L'ت'}, /* taa U+062A */
  309. {"artafull", L'ت'}, /* taa U+062A */
  310. {"artha", L'ث'}, /* thaa U+062B */
  311. {"arwaw", L'و'}, /* waw U+0648 */
  312. {"arya", L'ي'}, /* ya U+064A */
  313. {"aryafull", L'ي'}, /* ya U+064A */
  314. {"arzero", L'٠'}, /* indic zero U+0660 */
  315. {"asg", L'ʒ'}, /* unicycle character. Cf "hallow" */
  316. {"asper", LASP},
  317. {"assert", L'⊢'},
  318. {"astm", L'⁂'}, /* asterism: should be upside down */
  319. {"at", L'@'},
  320. {"atilde", L'ã'},
  321. {"auml", L'ä'},
  322. {"ayin", L'ع'}, /* arabic ain U+0639 */
  323. {"b1", L'-'}, /* single bond */
  324. {"b2", L'='}, /* double bond */
  325. {"b3", L'≡'}, /* triple bond */
  326. {"bbar", L'ƀ'}, /* b with bar U+0180 */
  327. {"beta", L'β'},
  328. {"bigobl", L'/'},
  329. {"blC", L'C'}, /* should be black letter */
  330. {"blJ", L'J'}, /* should be black letter */
  331. {"blU", L'U'}, /* should be black letter */
  332. {"blb", L'b'}, /* should be black letter */
  333. {"blozenge", L'◊'}, /* U+25CA; should be black */
  334. {"bly", L'y'}, /* should be black letter */
  335. {"bra", MBRA},
  336. {"brbl", LBRB},
  337. {"breve", LBRV},
  338. {"bslash", L'\\'},
  339. {"bsquare", L'■'}, /* black square U+25A0 */
  340. {"btril", L'◀'}, /* U+25C0 */
  341. {"btrir", L'▶'}, /* U+25B6 */
  342. {"c", L'c'}, /* circa */
  343. {"cab", L'〉'},
  344. {"cacu", L'ć'},
  345. {"canc", L'♋'},
  346. {"capr", L'♑'},
  347. {"caret", L'^'},
  348. {"cb", L'}'},
  349. {"cbigb", L'}'},
  350. {"cbigpren", L')'},
  351. {"cbigsb", L']'},
  352. {"cced", L'ç'},
  353. {"cdil", LCED},
  354. {"cdsb", L'〛'}, /* ]] U+301b */
  355. {"cent", L'¢'},
  356. {"chacek", L'č'},
  357. {"chi", L'χ'},
  358. {"circ", LRNG},
  359. {"circa", L'c'}, /* about (year) */
  360. {"circbl", L'̥'}, /* ring below accent U+0325 */
  361. {"circle", L'○'}, /* U+25CB */
  362. {"circledot", L'⊙'},
  363. {"click", L'ʖ'},
  364. {"club", L'♣'},
  365. {"comtime", L'C'},
  366. {"conj", L'☌'},
  367. {"cprt", L'©'},
  368. {"cq", L'\''},
  369. {"cqq", L'”'},
  370. {"cross", L'✠'}, /* maltese cross U+2720 */
  371. {"crotchet", L'♩'},
  372. {"csb", L']'},
  373. {"ctilde", L'c'}, /* +tilde */
  374. {"ctlig", MLCT},
  375. {"cyra", L'а'},
  376. {"cyre", L'е'},
  377. {"cyrhard", L'ъ'},
  378. {"cyrjat", L'ѣ'},
  379. {"cyrm", L'м'},
  380. {"cyrn", L'н'},
  381. {"cyrr", L'р'},
  382. {"cyrsoft", L'ь'},
  383. {"cyrt", L'т'},
  384. {"cyry", L'ы'},
  385. {"dag", L'†'},
  386. {"dbar", L'đ'},
  387. {"dblar", L'⇋'},
  388. {"dblgt", L'≫'},
  389. {"dbllt", L'≪'},
  390. {"dced", L'd'}, /* +cedilla */
  391. {"dd", MDD},
  392. {"ddag", L'‡'},
  393. {"ddd", MDDD},
  394. {"decr", L'↓'},
  395. {"deg", L'°'},
  396. {"dele", L'd'}, /* should be dele */
  397. {"delta", L'δ'},
  398. {"descnode", L'☋'}, /* descending node U+260B */
  399. {"diamond", L'♢'},
  400. {"digamma", L'ϝ'},
  401. {"div", L'÷'},
  402. {"dlessi", L'ı'},
  403. {"dlessj1", L'j'}, /* should be dotless */
  404. {"dlessj2", L'j'}, /* should be dotless */
  405. {"dlessj3", L'j'}, /* should be dotless */
  406. {"dollar", L'$'},
  407. {"dotab", LDOT},
  408. {"dotbl", LDTB},
  409. {"drachm", L'ʒ'},
  410. {"dubh", L'-'},
  411. {"eacu", L'é'},
  412. {"earth", L'♁'},
  413. {"easper", MEAS},
  414. {"ebreve", L'ĕ'},
  415. {"ecirc", L'ê'},
  416. {"edh", L'ð'},
  417. {"egrave", L'è'},
  418. {"ehacek", L'ě'},
  419. {"ehook", L'ę'},
  420. {"elem", L'∊'},
  421. {"elenis", MELN},
  422. {"em", L'—'},
  423. {"emac", L'ē'},
  424. {"emem", MEMM},
  425. {"en", L'–'},
  426. {"epsilon", L'ε'},
  427. {"equil", L'⇋'},
  428. {"ergo", L'∴'},
  429. {"es", MES},
  430. {"eszett", L'ß'},
  431. {"eta", L'η'},
  432. {"eth", L'ð'},
  433. {"euml", L'ë'},
  434. {"expon", L'↑'},
  435. {"fact", L'!'},
  436. {"fata", L'ɑ'},
  437. {"fatpara", L'¶'}, /* should have fatter, filled in bowl */
  438. {"female", L'♀'},
  439. {"ffilig", MLFFI},
  440. {"fflig", MLFF},
  441. {"ffllig", MLFFL},
  442. {"filig", MLFI},
  443. {"flat", L'♭'},
  444. {"fllig", MLFL},
  445. {"frE", L'E'}, /* should be curly */
  446. {"frL", L'L'}, /* should be curly */
  447. {"frR", L'R'}, /* should be curly */
  448. {"frakB", L'B'}, /* should have fraktur style */
  449. {"frakG", L'G'},
  450. {"frakH", L'H'},
  451. {"frakI", L'I'},
  452. {"frakM", L'M'},
  453. {"frakU", L'U'},
  454. {"frakX", L'X'},
  455. {"frakY", L'Y'},
  456. {"frakh", L'h'},
  457. {"frbl", LFRB},
  458. {"frown", LFRN},
  459. {"fs", L' '},
  460. {"fsigma", L'ς'},
  461. {"gAacu", L'Á'}, /* should be Α+acute */
  462. {"gaacu", L'α'}, /* +acute */
  463. {"gabreve", L'α'}, /* +breve */
  464. {"gafrown", L'α'}, /* +frown */
  465. {"gagrave", L'α'}, /* +grave */
  466. {"gamac", L'α'}, /* +macron */
  467. {"gamma", L'γ'},
  468. {"gauml", L'α'}, /* +umlaut */
  469. {"ge", L'≧'},
  470. {"geacu", L'ε'}, /* +acute */
  471. {"gegrave", L'ε'}, /* +grave */
  472. {"ghacu", L'η'}, /* +acute */
  473. {"ghfrown", L'η'}, /* +frown */
  474. {"ghgrave", L'η'}, /* +grave */
  475. {"ghmac", L'η'}, /* +macron */
  476. {"giacu", L'ι'}, /* +acute */
  477. {"gibreve", L'ι'}, /* +breve */
  478. {"gifrown", L'ι'}, /* +frown */
  479. {"gigrave", L'ι'}, /* +grave */
  480. {"gimac", L'ι'}, /* +macron */
  481. {"giuml", L'ι'}, /* +umlaut */
  482. {"glagjat", L'ѧ'},
  483. {"glots", L'ˀ'},
  484. {"goacu", L'ο'}, /* +acute */
  485. {"gobreve", L'ο'}, /* +breve */
  486. {"grave", LGRV},
  487. {"gt", L'>'},
  488. {"guacu", L'υ'}, /* +acute */
  489. {"gufrown", L'υ'}, /* +frown */
  490. {"gugrave", L'υ'}, /* +grave */
  491. {"gumac", L'υ'}, /* +macron */
  492. {"guuml", L'υ'}, /* +umlaut */
  493. {"gwacu", L'ω'}, /* +acute */
  494. {"gwfrown", L'ω'}, /* +frown */
  495. {"gwgrave", L'ω'}, /* +grave */
  496. {"hacek", LHCK},
  497. {"halft", L'⌈'},
  498. {"hash", L'#'},
  499. {"hasper", MHAS},
  500. {"hatpath", L'ֲ'}, /* hataf patah U+05B2 */
  501. {"hatqam", L'ֳ'}, /* hataf qamats U+05B3 */
  502. {"hatseg", L'ֱ'}, /* hataf segol U+05B1 */
  503. {"hbar", L'ħ'},
  504. {"heart", L'♡'},
  505. {"hebaleph", L'א'}, /* aleph U+05D0 */
  506. {"hebayin", L'ע'}, /* ayin U+05E2 */
  507. {"hebbet", L'ב'}, /* bet U+05D1 */
  508. {"hebbeth", L'ב'}, /* bet U+05D1 */
  509. {"hebcheth", L'ח'}, /* bet U+05D7 */
  510. {"hebdaleth", L'ד'}, /* dalet U+05D3 */
  511. {"hebgimel", L'ג'}, /* gimel U+05D2 */
  512. {"hebhe", L'ה'}, /* he U+05D4 */
  513. {"hebkaph", L'כ'}, /* kaf U+05DB */
  514. {"heblamed", L'ל'}, /* lamed U+05DC */
  515. {"hebmem", L'מ'}, /* mem U+05DE */
  516. {"hebnun", L'נ'}, /* nun U+05E0 */
  517. {"hebnunfin", L'ן'}, /* final nun U+05DF */
  518. {"hebpe", L'פ'}, /* pe U+05E4 */
  519. {"hebpedag", L'ף'}, /* final pe? U+05E3 */
  520. {"hebqoph", L'ק'}, /* qof U+05E7 */
  521. {"hebresh", L'ר'}, /* resh U+05E8 */
  522. {"hebshin", L'ש'}, /* shin U+05E9 */
  523. {"hebtav", L'ת'}, /* tav U+05EA */
  524. {"hebtsade", L'צ'}, /* tsadi U+05E6 */
  525. {"hebwaw", L'ו'}, /* vav? U+05D5 */
  526. {"hebyod", L'י'}, /* yod U+05D9 */
  527. {"hebzayin", L'ז'}, /* zayin U+05D6 */
  528. {"hgz", L'ʒ'}, /* ??? Cf "alet" */
  529. {"hireq", L'ִ'}, /* U+05B4 */
  530. {"hlenis", MHLN},
  531. {"hook", LOGO},
  532. {"horizE", L'E'}, /* should be on side */
  533. {"horizP", L'P'}, /* should be on side */
  534. {"horizS", L'∽'},
  535. {"horizT", L'⊣'},
  536. {"horizb", L'{'}, /* should be underbrace */
  537. {"ia", L'α'},
  538. {"iacu", L'í'},
  539. {"iasper", MIAS},
  540. {"ib", L'β'},
  541. {"ibar", L'ɨ'},
  542. {"ibreve", L'ĭ'},
  543. {"icirc", L'î'},
  544. {"id", L'δ'},
  545. {"ident", L'≡'},
  546. {"ie", L'ε'},
  547. {"ifilig", MLFI},
  548. {"ifflig", MLFF},
  549. {"ig", L'γ'},
  550. {"igrave", L'ì'},
  551. {"ih", L'η'},
  552. {"ii", L'ι'},
  553. {"ik", L'κ'},
  554. {"ilenis", MILN},
  555. {"imac", L'ī'},
  556. {"implies", L'⇒'},
  557. {"index", L'☞'},
  558. {"infin", L'∞'},
  559. {"integ", L'∫'},
  560. {"intsec", L'∩'},
  561. {"invpri", L'ˏ'},
  562. {"iota", L'ι'},
  563. {"iq", L'ψ'},
  564. {"istlig", MLST},
  565. {"isub", L'ϵ'}, /* iota below accent */
  566. {"iuml", L'ï'},
  567. {"iz", L'ζ'},
  568. {"jup", L'♃'},
  569. {"kappa", L'κ'},
  570. {"koppa", L'ϟ'},
  571. {"lambda", L'λ'},
  572. {"lar", L'←'},
  573. {"lbar", L'ł'},
  574. {"le", L'≦'},
  575. {"lenis", LLEN},
  576. {"leo", L'♌'},
  577. {"lhalfbr", L'⌈'},
  578. {"lhshoe", L'⊃'},
  579. {"libra", L'♎'},
  580. {"llswing", MLLS},
  581. {"lm", L'ː'},
  582. {"logicand", L'∧'},
  583. {"logicor", L'∨'},
  584. {"longs", L'ʃ'},
  585. {"lrar", L'↔'},
  586. {"lt", L'<'},
  587. {"ltappr", L'≾'},
  588. {"ltflat", L'∠'},
  589. {"lumlbl", L'l'}, /* +umlaut below */
  590. {"mac", LMAC},
  591. {"male", L'♂'},
  592. {"mc", L'c'}, /* should be raised */
  593. {"merc", L'☿'}, /* mercury U+263F */
  594. {"min", L'−'},
  595. {"moonfq", L'☽'}, /* first quarter moon U+263D */
  596. {"moonlq", L'☾'}, /* last quarter moon U+263E */
  597. {"msylab", L'm'}, /* +sylab (ˌ) */
  598. {"mu", L'μ'},
  599. {"nacu", L'ń'},
  600. {"natural", L'♮'},
  601. {"neq", L'≠'},
  602. {"nfacu", L'′'},
  603. {"nfasper", L'ʽ'},
  604. {"nfbreve", L'˘'},
  605. {"nfced", L'¸'},
  606. {"nfcirc", L'ˆ'},
  607. {"nffrown", L'⌢'},
  608. {"nfgra", L'ˋ'},
  609. {"nfhacek", L'ˇ'},
  610. {"nfmac", L'¯'},
  611. {"nftilde", L'˜'},
  612. {"nfuml", L'¨'},
  613. {"ng", L'ŋ'},
  614. {"not", L'¬'},
  615. {"notelem", L'∉'},
  616. {"ntilde", L'ñ'},
  617. {"nu", L'ν'},
  618. {"oab", L'〈'},
  619. {"oacu", L'ó'},
  620. {"oasper", MOAS},
  621. {"ob", L'{'},
  622. {"obar", L'ø'},
  623. {"obigb", L'{'}, /* should be big */
  624. {"obigpren", L'('},
  625. {"obigsb", L'['}, /* should be big */
  626. {"obreve", L'ŏ'},
  627. {"ocirc", L'ô'},
  628. {"odsb", L'〚'}, /* [[ U+301A */
  629. {"oe", L'œ'},
  630. {"oeamp", L'&'},
  631. {"ograve", L'ò'},
  632. {"ohook", L'o'}, /* +hook */
  633. {"olenis", MOLN},
  634. {"omac", L'ō'},
  635. {"omega", L'ω'},
  636. {"omicron", L'ο'},
  637. {"ope", L'ɛ'},
  638. {"opp", L'☍'},
  639. {"oq", L'`'},
  640. {"oqq", L'“'},
  641. {"or", MOR},
  642. {"osb", L'['},
  643. {"otilde", L'õ'},
  644. {"ouml", L'ö'},
  645. {"ounce", L'℥'}, /* ounce U+2125 */
  646. {"ovparen", L'⌢'}, /* should be sideways ( */
  647. {"p", L'′'},
  648. {"pa", L'∂'},
  649. {"page", L'P'},
  650. {"pall", L'ʎ'},
  651. {"paln", L'ɲ'},
  652. {"par", PAR},
  653. {"para", L'¶'},
  654. {"pbar", L'p'}, /* +bar */
  655. {"per", L'℘'}, /* per U+2118 */
  656. {"phi", L'φ'},
  657. {"phi2", L'ϕ'},
  658. {"pi", L'π'},
  659. {"pisces", L'♓'},
  660. {"planck", L'ħ'},
  661. {"plantinJ", L'J'}, /* should be script */
  662. {"pm", L'±'},
  663. {"pmil", L'‰'},
  664. {"pp", L'″'},
  665. {"ppp", L'‴'},
  666. {"prop", L'∝'},
  667. {"psi", L'ψ'},
  668. {"pstlg", L'£'},
  669. {"q", L'?'}, /* should be raised */
  670. {"qamets", L'ֳ'}, /* U+05B3 */
  671. {"quaver", L'♪'},
  672. {"rar", L'→'},
  673. {"rasper", MRAS},
  674. {"rdot", L'·'},
  675. {"recipe", L'℞'}, /* U+211E */
  676. {"reg", L'®'},
  677. {"revC", L'Ɔ'}, /* open O U+0186 */
  678. {"reva", L'ɒ'},
  679. {"revc", L'ɔ'},
  680. {"revope", L'ɜ'},
  681. {"revr", L'ɹ'},
  682. {"revsc", L'˒'}, /* upside-down semicolon */
  683. {"revv", L'ʌ'},
  684. {"rfa", L'o'}, /* +hook (Cf "goal") */
  685. {"rhacek", L'ř'},
  686. {"rhalfbr", L'⌉'},
  687. {"rho", L'ρ'},
  688. {"rhshoe", L'⊂'},
  689. {"rlenis", MRLN},
  690. {"rsylab", L'r'}, /* +sylab */
  691. {"runash", L'F'}, /* should be runic 'ash' */
  692. {"rvow", L'˔'},
  693. {"sacu", L'ś'},
  694. {"sagit", L'♐'},
  695. {"sampi", L'ϡ'},
  696. {"saturn", L'♄'},
  697. {"sced", L'ş'},
  698. {"schwa", L'ə'},
  699. {"scorpio", L'♏'},
  700. {"scrA", L'A'}, /* should be script */
  701. {"scrC", L'C'},
  702. {"scrE", L'E'},
  703. {"scrF", L'F'},
  704. {"scrI", L'I'},
  705. {"scrJ", L'J'},
  706. {"scrL", L'L'},
  707. {"scrO", L'O'},
  708. {"scrP", L'P'},
  709. {"scrQ", L'Q'},
  710. {"scrS", L'S'},
  711. {"scrT", L'T'},
  712. {"scrb", L'b'},
  713. {"scrd", L'd'},
  714. {"scrh", L'h'},
  715. {"scrl", L'l'},
  716. {"scruple", L'℈'}, /* U+2108 */
  717. {"sdd", L'ː'},
  718. {"sect", L'§'},
  719. {"semE", L'∃'},
  720. {"sh", L'ʃ'},
  721. {"shacek", L'š'},
  722. {"sharp", L'♯'},
  723. {"sheva", L'ְ'}, /* U+05B0 */
  724. {"shti", L'ɪ'},
  725. {"shtsyll", L'∪'},
  726. {"shtu", L'ʊ'},
  727. {"sidetri", L'⊲'},
  728. {"sigma", L'σ'},
  729. {"since", L'∵'},
  730. {"slge", L'≥'}, /* should have slanted line under */
  731. {"slle", L'≤'}, /* should have slanted line under */
  732. {"sm", L'ˈ'},
  733. {"smm", L'ˌ'},
  734. {"spade", L'♠'},
  735. {"sqrt", L'√'},
  736. {"square", L'□'}, /* U+25A1 */
  737. {"ssChi", L'Χ'}, /* should be sans serif */
  738. {"ssIota", L'Ι'},
  739. {"ssOmicron", L'Ο'},
  740. {"ssPi", L'Π'},
  741. {"ssRho", L'Ρ'},
  742. {"ssSigma", L'Σ'},
  743. {"ssTau", L'Τ'},
  744. {"star", L'*'},
  745. {"stlig", MLST},
  746. {"sup2", L'⁲'},
  747. {"supgt", L'˃'},
  748. {"suplt", L'˂'},
  749. {"sur", L'ʳ'},
  750. {"swing", L'∼'},
  751. {"tau", L'τ'},
  752. {"taur", L'♉'},
  753. {"th", L'þ'},
  754. {"thbar", L'þ'}, /* +bar */
  755. {"theta", L'θ'},
  756. {"thinqm", L'?'}, /* should be thinner */
  757. {"tilde", LTIL},
  758. {"times", L'×'},
  759. {"tri", L'∆'},
  760. {"trli", L'‖'},
  761. {"ts", L' '},
  762. {"uacu", L'ú'},
  763. {"uasper", MUAS},
  764. {"ubar", L'u'}, /* +bar */
  765. {"ubreve", L'ŭ'},
  766. {"ucirc", L'û'},
  767. {"udA", L'∀'},
  768. {"udT", L'⊥'},
  769. {"uda", L'ɐ'},
  770. {"udh", L'ɥ'},
  771. {"udqm", L'¿'},
  772. {"udpsi", L'⋔'},
  773. {"udtr", L'∇'},
  774. {"ugrave", L'ù'},
  775. {"ulenis", MULN},
  776. {"umac", L'ū'},
  777. {"uml", LUML},
  778. {"undl", L'ˍ'}, /* underline accent */
  779. {"union", L'∪'},
  780. {"upsilon", L'υ'},
  781. {"uuml", L'ü'},
  782. {"vavpath", L'ו'}, /* vav U+05D5 (+patah) */
  783. {"vavsheva", L'ו'}, /* vav U+05D5 (+sheva) */
  784. {"vb", L'|'},
  785. {"vddd", L'⋮'},
  786. {"versicle2", L'℣'}, /* U+2123 */
  787. {"vinc", L'¯'},
  788. {"virgo", L'♍'},
  789. {"vpal", L'ɟ'},
  790. {"vvf", L'ɣ'},
  791. {"wasper", MWAS},
  792. {"wavyeq", L'≈'},
  793. {"wlenis", MWLN},
  794. {"wyn", L'ƿ'}, /* wynn U+01BF */
  795. {"xi", L'ξ'},
  796. {"yacu", L'ý'},
  797. {"ycirc", L'ŷ'},
  798. {"ygh", L'ʒ'},
  799. {"ymac", L'y'}, /* +macron */
  800. {"yuml", L'ÿ'},
  801. {"zced", L'z'}, /* +cedilla */
  802. {"zeta", L'ζ'},
  803. {"zh", L'ʒ'},
  804. {"zhacek", L'ž'},
  805. };
  806. /*
  807. The following special characters don't have close enough
  808. equivalents in Unicode, so aren't in the above table.
  809. 22n 2^(2^n) Cf Fermat
  810. 2on4 2/4
  811. 3on8 3/8
  812. Bantuo Bantu O. Cf Otshi-herero
  813. Car C with circular arrow on top
  814. albrtime cut-time: C with vertical line
  815. ardal Cf dental
  816. bantuo Bantu o. Cf Otshi-herero
  817. bbc1 single chem bond below
  818. bbc2 double chem bond below
  819. bbl1 chem bond like /
  820. bbl2 chem bond like //
  821. bbr1 chem bond like \
  822. bbr2 chem bond \\
  823. bcop1 copper symbol. Cf copper
  824. bcop2 copper symbol. Cf copper
  825. benchm Cf benchmark
  826. btc1 single chem bond above
  827. btc2 double chem bond above
  828. btl1 chem bond like \
  829. btl2 chem bond like \\
  830. btr1 chem bond like /
  831. btr2 chem bond line //
  832. burman Cf Burman
  833. devph sanskrit letter. Cf ph
  834. devrfls sanskrit letter. Cf cerebral
  835. duplong[12] musical note
  836. egchi early form of chi
  837. eggamma[12] early form of gamma
  838. egiota early form of iota
  839. egkappa early form of kappa
  840. eglambda early form of lambda
  841. egmu[12] early form of mu
  842. egnu[12] early form of nu
  843. egpi[123] early form of pi
  844. egrho[12] early form of rho
  845. egsampi early form of sampi
  846. egsan early form of san
  847. egsigma[12] early form of sigma
  848. egxi[123] early form of xi
  849. elatS early form of S
  850. elatc[12] early form of C
  851. elatg[12] early form of G
  852. glagjeri Slavonic Glagolitic jeri
  853. glagjeru Slavonic Glagolitic jeru
  854. hypolem hypolemisk (line with underdot)
  855. lhrbr lower half }
  856. longmord long mordent
  857. mbwvow backwards scretched C. Cf retract.
  858. mord music symbol. Cf mordent
  859. mostra Cf direct
  860. ohgcirc old form of circumflex
  861. oldbeta old form of β. Cf perturbate
  862. oldsemibr[12] old forms of semibreve. Cf prolation
  863. ormg old form of g. Cf G
  864. para[12345] form of ¶
  865. pauseo musical pause sign
  866. pauseu musical pause sign
  867. pharyng Cf pharyngal
  868. ragr Black letter ragged r
  869. repetn musical repeat. Cf retort
  870. segno musical segno sign
  871. semain[12] semitic ain
  872. semhe semitic he
  873. semheth semitic heth
  874. semkaph semitic kaph
  875. semlamed[12] semitic lamed
  876. semmem semitic mem
  877. semnum semitic nun
  878. sempe semitic pe
  879. semqoph[123] semitic qoph
  880. semresh semitic resh
  881. semtav[1234] semitic tav
  882. semyod semitic yod
  883. semzayin[123] semitic zayin
  884. shtlong[12] U with underbar. Cf glyconic
  885. sigmatau σ,τ combination
  886. squaver sixteenth note
  887. sqbreve square musical breve note
  888. swast swastika
  889. uhrbr upper half of big }
  890. versicle1 Cf versicle
  891. */
  892. static Rune normtab[128] = {
  893. /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
  894. /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  895. NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  896. /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  897. NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  898. /*20*/ L' ', L'!', L'"', L'#', L'$', L'%', SPCS, L'\'',
  899. L'(', L')', L'*', L'+', L',', L'-', L'.', L'/',
  900. /*30*/ L'0', L'1', L'2', L'3', L'4', L'5', L'6', L'7',
  901. L'8', L'9', L':', L';', TAGS, L'=', TAGE, L'?',
  902. /*40*/ L'@', L'A', L'B', L'C', L'D', L'E', L'F', L'G',
  903. L'H', L'I', L'J', L'K', L'L', L'M', L'N', L'O',
  904. /*50*/ L'P', L'Q', L'R', L'S', L'T', L'U', L'V', L'W',
  905. L'X', L'Y', L'Z', L'[', L'\\', L']', L'^', L'_',
  906. /*60*/ L'`', L'a', L'b', L'c', L'd', L'e', L'f', L'g',
  907. L'h', L'i', L'j', L'k', L'l', L'm', L'n', L'o',
  908. /*70*/ L'p', L'q', L'r', L's', L't', L'u', L'v', L'w',
  909. L'x', L'y', L'z', L'{', L'|', L'}', L'~', NONE,
  910. };
  911. static Rune phtab[128] = {
  912. /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
  913. /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  914. NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  915. /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  916. NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  917. /*20*/ L' ', L'!', L'ˈ', L'#', L'$', L'ˌ', L'æ', L'\'',
  918. L'(', L')', L'*', L'+', L',', L'-', L'.', L'/',
  919. /*30*/ L'0', L'1', L'2', L'ɜ', L'4', L'5', L'6', L'7',
  920. L'8', L'ø', L'ː', L';', TAGS, L'=', TAGE, L'?',
  921. /*40*/ L'ə', L'ɑ', L'B', L'C', L'ð', L'ɛ', L'F', L'G',
  922. L'H', L'ɪ', L'J', L'K', L'L', L'M', L'ŋ', L'ɔ',
  923. /*50*/ L'P', L'ɒ', L'R', L'ʃ', L'θ', L'ʊ', L'ʌ', L'W',
  924. L'X', L'Y', L'ʒ', L'[', L'\\', L']', L'^', L'_',
  925. /*60*/ L'`', L'a', L'b', L'c', L'd', L'e', L'f', L'g',
  926. L'h', L'i', L'j', L'k', L'l', L'm', L'n', L'o',
  927. /*70*/ L'p', L'q', L'r', L's', L't', L'u', L'v', L'w',
  928. L'x', L'y', L'z', L'{', L'|', L'}', L'~', NONE,
  929. };
  930. static Rune grtab[128] = {
  931. /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
  932. /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  933. NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  934. /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  935. NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  936. /*20*/ L' ', L'!', L'"', L'#', L'$', L'%', SPCS, L'\'',
  937. L'(', L')', L'*', L'+', L',', L'-', L'.', L'/',
  938. /*30*/ L'0', L'1', L'2', L'3', L'4', L'5', L'6', L'7',
  939. L'8', L'9', L':', L';', TAGS, L'=', TAGE, L'?',
  940. /*40*/ L'@', L'Α', L'Β', L'Ξ', L'Δ', L'Ε', L'Φ', L'Γ',
  941. L'Η', L'Ι', L'Ϛ', L'Κ', L'Λ', L'Μ', L'Ν', L'Ο',
  942. /*50*/ L'Π', L'Θ', L'Ρ', L'Σ', L'Τ', L'Υ', L'V', L'Ω',
  943. L'Χ', L'Ψ', L'Ζ', L'[', L'\\', L']', L'^', L'_',
  944. /*60*/ L'`', L'α', L'β', L'ξ', L'δ', L'ε', L'φ', L'γ',
  945. L'η', L'ι', L'ς', L'κ', L'λ', L'μ', L'ν', L'ο',
  946. /*70*/ L'π', L'θ', L'ρ', L'σ', L'τ', L'υ', L'v', L'ω',
  947. L'χ', L'ψ', L'ζ', L'{', L'|', L'}', L'~', NONE,
  948. };
  949. static Rune subtab[128] = {
  950. /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
  951. /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  952. NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  953. /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  954. NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  955. /*20*/ L' ', L'!', L'"', L'#', L'$', L'%', SPCS, L'\'',
  956. L'₍', L'₎', L'*', L'₊', L',', L'₋', L'.', L'/',
  957. /*30*/ L'₀', L'₁', L'₂', L'₃', L'₄', L'₅', L'₆', L'₇',
  958. L'₈', L'₉', L':', L';', TAGS, L'₌', TAGE, L'?',
  959. /*40*/ L'@', L'A', L'B', L'C', L'D', L'E', L'F', L'G',
  960. L'H', L'I', L'J', L'K', L'L', L'M', L'N', L'O',
  961. /*50*/ L'P', L'Q', L'R', L'S', L'T', L'U', L'V', L'W',
  962. L'X', L'Y', L'Z', L'[', L'\\', L']', L'^', L'_',
  963. /*60*/ L'`', L'a', L'b', L'c', L'd', L'e', L'f', L'g',
  964. L'h', L'i', L'j', L'k', L'l', L'm', L'n', L'o',
  965. /*70*/ L'p', L'q', L'r', L's', L't', L'u', L'v', L'w',
  966. L'x', L'y', L'z', L'{', L'|', L'}', L'~', NONE,
  967. };
  968. static Rune suptab[128] = {
  969. /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
  970. /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  971. NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  972. /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  973. NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  974. /*20*/ L' ', L'!', L'"', L'#', L'$', L'%', SPCS, L'\'',
  975. L'⁽', L'⁾', L'*', L'⁺', L',', L'⁻', L'.', L'/',
  976. /*30*/ L'⁰', L'ⁱ', L'⁲', L'⁳', L'⁴', L'⁵', L'⁶', L'⁷',
  977. L'⁸', L'⁹', L':', L';', TAGS, L'⁼', TAGE, L'?',
  978. /*40*/ L'@', L'A', L'B', L'C', L'D', L'E', L'F', L'G',
  979. L'H', L'I', L'J', L'K', L'L', L'M', L'N', L'O',
  980. /*50*/ L'P', L'Q', L'R', L'S', L'T', L'U', L'V', L'W',
  981. L'X', L'Y', L'Z', L'[', L'\\', L']', L'^', L'_',
  982. /*60*/ L'`', L'a', L'b', L'c', L'd', L'e', L'f', L'g',
  983. L'h', L'i', L'j', L'k', L'l', L'm', L'n', L'o',
  984. /*70*/ L'p', L'q', L'r', L's', L't', L'u', L'v', L'w',
  985. L'x', L'y', L'z', L'{', L'|', L'}', L'~', NONE,
  986. };
  987. static int tagstarts;
  988. static char tag[Buflen];
  989. static int naux;
  990. static char auxname[Maxaux][Buflen];
  991. static char auxval[Maxaux][Buflen];
  992. static char spec[Buflen];
  993. static char *auxstate[Naux]; /* vals for most recent tag */
  994. static Entry curentry;
  995. #define cursize (curentry.end-curentry.start)
  996. static char *getspec(char *, char *);
  997. static char *gettag(char *, char *);
  998. static void dostatus(void);
  999. /*
  1000. * cmd is one of:
  1001. * 'p': normal print
  1002. * 'h': just print headwords
  1003. * 'P': print raw
  1004. */
  1005. void
  1006. oedprintentry(Entry e, int cmd)
  1007. {
  1008. char *p, *pe;
  1009. int t, a, i;
  1010. int32_t r, rprev, rlig;
  1011. Rune *transtab;
  1012. p = e.start;
  1013. pe = e.end;
  1014. transtab = normtab;
  1015. rprev = NONE;
  1016. changett(0, 0, 0);
  1017. curentry = e;
  1018. if(cmd == 'h')
  1019. outinhibit = 1;
  1020. while(p < pe) {
  1021. if(cmd == 'r') {
  1022. outchar(*p++);
  1023. continue;
  1024. }
  1025. r = transtab[(*p++)&0x7F];
  1026. if(r < NONE) {
  1027. /* Emit the rune, but buffer in case of ligature */
  1028. if(rprev != NONE)
  1029. outrune(rprev);
  1030. rprev = r;
  1031. } else if(r == SPCS) {
  1032. /* Start of special character name */
  1033. p = getspec(p, pe);
  1034. r = lookassoc(spectab, asize(spectab), spec);
  1035. if(r == -1) {
  1036. if(debug)
  1037. err("spec %ld %d %s",
  1038. e.doff, cursize, spec);
  1039. r = L'�';
  1040. }
  1041. if(r >= LIGS && r < LIGE) {
  1042. /* handle possible ligature */
  1043. rlig = liglookup(r, rprev);
  1044. if(rlig != NONE)
  1045. rprev = rlig; /* overwrite rprev */
  1046. else {
  1047. /* could print accent, but let's not */
  1048. if(rprev != NONE) outrune(rprev);
  1049. rprev = NONE;
  1050. }
  1051. } else if(r >= MULTI && r < MULTIE) {
  1052. if(rprev != NONE) {
  1053. outrune(rprev);
  1054. rprev = NONE;
  1055. }
  1056. outrunes(multitab[r-MULTI]);
  1057. } else if(r == PAR) {
  1058. if(rprev != NONE) {
  1059. outrune(rprev);
  1060. rprev = NONE;
  1061. }
  1062. outnl(1);
  1063. } else {
  1064. if(rprev != NONE) outrune(rprev);
  1065. rprev = r;
  1066. }
  1067. } else if(r == TAGS) {
  1068. /* Start of tag name */
  1069. if(rprev != NONE) {
  1070. outrune(rprev);
  1071. rprev = NONE;
  1072. }
  1073. p = gettag(p, pe);
  1074. t = lookassoc(tagtab, asize(tagtab), tag);
  1075. if(t == -1) {
  1076. if(debug)
  1077. err("tag %ld %d %s",
  1078. e.doff, cursize, tag);
  1079. continue;
  1080. }
  1081. for(i = 0; i < Naux; i++)
  1082. auxstate[i] = 0;
  1083. for(i = 0; i < naux; i++) {
  1084. a = lookassoc(auxtab, asize(auxtab), auxname[i]);
  1085. if(a == -1) {
  1086. if(debug)
  1087. err("aux %ld %d %s",
  1088. e.doff, cursize, auxname[i]);
  1089. } else
  1090. auxstate[a] = auxval[i];
  1091. }
  1092. switch(t){
  1093. case E:
  1094. case Ve:
  1095. outnl(0);
  1096. if(tagstarts)
  1097. dostatus();
  1098. break;
  1099. case Ed:
  1100. case Etym:
  1101. outchar(tagstarts? '[' : ']');
  1102. break;
  1103. case Pr:
  1104. outchar(tagstarts? '(' : ')');
  1105. break;
  1106. case In:
  1107. transtab = changett(transtab, subtab, tagstarts);
  1108. break;
  1109. case Hm:
  1110. case Su:
  1111. case Fq:
  1112. transtab = changett(transtab, suptab, tagstarts);
  1113. break;
  1114. case Gk:
  1115. transtab = changett(transtab, grtab, tagstarts);
  1116. break;
  1117. case Ph:
  1118. transtab = changett(transtab, phtab, tagstarts);
  1119. break;
  1120. case Hw:
  1121. if(cmd == 'h') {
  1122. if(!tagstarts)
  1123. outchar(' ');
  1124. outinhibit = !tagstarts;
  1125. }
  1126. break;
  1127. case S0:
  1128. case S1:
  1129. case S2:
  1130. case S3:
  1131. case S4:
  1132. case S5:
  1133. case S6:
  1134. case S7a:
  1135. case S7n:
  1136. case Sn:
  1137. case Sgk:
  1138. if(tagstarts) {
  1139. outnl(2);
  1140. dostatus();
  1141. if(auxstate[Num]) {
  1142. if(t == S3 || t == S5) {
  1143. i = atoi(auxstate[Num]);
  1144. while(i--)
  1145. outchar('*');
  1146. outchars(" ");
  1147. } else if(t == S7a || t == S7n || t == Sn) {
  1148. outchar('(');
  1149. outchars(auxstate[Num]);
  1150. outchars(") ");
  1151. } else if(t == Sgk) {
  1152. i = grtab[auxstate[Num][0]];
  1153. if(i != NONE)
  1154. outrune(i);
  1155. outchars(". ");
  1156. } else {
  1157. outchars(auxstate[Num]);
  1158. outchars(". ");
  1159. }
  1160. }
  1161. }
  1162. break;
  1163. case Cb:
  1164. case Db:
  1165. case Qp:
  1166. case P:
  1167. if(tagstarts)
  1168. outnl(1);
  1169. break;
  1170. case Table:
  1171. /*
  1172. * Todo: gather columns, justify them, etc.
  1173. * For now, just let colums come out as rows
  1174. */
  1175. if(!tagstarts)
  1176. outnl(0);
  1177. break;
  1178. case Col:
  1179. if(tagstarts)
  1180. outnl(0);
  1181. break;
  1182. case Dn:
  1183. if(tagstarts)
  1184. outchar('/');
  1185. break;
  1186. }
  1187. }
  1188. }
  1189. if(cmd == 'h') {
  1190. outinhibit = 0;
  1191. outnl(0);
  1192. }
  1193. }
  1194. /*
  1195. * Return offset into bdict where next oed entry after fromoff starts.
  1196. * Oed entries start with <e>, <ve>, <e st=...>, or <ve st=...>
  1197. */
  1198. int32_t
  1199. oednextoff(int32_t fromoff)
  1200. {
  1201. int32_t a, n;
  1202. int c;
  1203. a = Bseek(bdict, fromoff, 0);
  1204. if(a < 0)
  1205. return -1;
  1206. n = 0;
  1207. for(;;) {
  1208. c = Bgetc(bdict);
  1209. if(c < 0)
  1210. break;
  1211. if(c == '<') {
  1212. c = Bgetc(bdict);
  1213. if(c == 'e') {
  1214. c = Bgetc(bdict);
  1215. if(c == '>' || c == ' ')
  1216. n = 3;
  1217. } else if(c == 'v' && Bgetc(bdict) == 'e') {
  1218. c = Bgetc(bdict);
  1219. if(c == '>' || c == ' ')
  1220. n = 4;
  1221. }
  1222. if(n)
  1223. break;
  1224. }
  1225. }
  1226. return (Boffset(bdict)-n);
  1227. }
  1228. static char *prkey =
  1229. "KEY TO THE PRONUNCIATION\n"
  1230. "\n"
  1231. "I. CONSONANTS\n"
  1232. "b, d, f, k, l, m, n, p, t, v, z: usual English values\n"
  1233. "\n"
  1234. "g as in go (gəʊ)\n"
  1235. "h ... ho! (həʊ)\n"
  1236. "r ... run (rʌn), terrier (ˈtɛriə(r))\n"
  1237. "(r)... her (hɜː(r))\n"
  1238. "s ... see (siː), success (səkˈsɜs)\n"
  1239. "w ... wear (wɛə(r))\n"
  1240. "hw ... when (hwɛn)\n"
  1241. "j ... yes (jɛs)\n"
  1242. "θ ... thin (θin), bath (bɑːθ)\n"
  1243. "ð ... then (ðɛn), bathe (beɪð)\n"
  1244. "ʃ ... shop (ʃɒp), dish (dɪʃ)\n"
  1245. "tʃ ... chop (tʃɒp), ditch (dɪtʃ)\n"
  1246. "ʒ ... vision (ˈvɪʒən), déjeuner (deʒøne)\n"
  1247. "dʒ ... judge (dʒʌdʒ)\n"
  1248. "ŋ ... singing (ˈsɪŋɪŋ), think (θiŋk)\n"
  1249. "ŋg ... finger (ˈfiŋgə(r))\n"
  1250. "\n"
  1251. "Foreign\n"
  1252. "ʎ as in It. seraglio (serˈraʎo)\n"
  1253. "ɲ ... Fr. cognac (kɔɲak)\n"
  1254. "x ... Ger. ach (ax), Sc. loch (lɒx)\n"
  1255. "ç ... Ger. ich (ɪç), Sc. nicht (nɪçt)\n"
  1256. "ɣ ... North Ger. sagen (ˈzaːɣən)\n"
  1257. "c ... Afrikaans baardmannetjie (ˈbaːrtmanəci)\n"
  1258. "ɥ ... Fr. cuisine (kɥizin)\n"
  1259. "\n"
  1260. "II. VOWELS AND DIPTHONGS\n"
  1261. "\n"
  1262. "Short\n"
  1263. "ɪ as in pit (pɪt), -ness (-nɪs)\n"
  1264. "ɛ ... pet (pɛt), Fr. sept (sɛt)\n"
  1265. "æ ... pat (pæt)\n"
  1266. "ʌ ... putt (pʌt)\n"
  1267. "ɒ ... pot (pɒt)\n"
  1268. "ʊ ... put (pʊt)\n"
  1269. "ə ... another (əˈnʌðə(r))\n"
  1270. "(ə)... beaten (ˈbiːt(ə)n)\n"
  1271. "i ... Fr. si (si)\n"
  1272. "e ... Fr. bébé (bebe)\n"
  1273. "a ... Fr. mari (mari)\n"
  1274. "ɑ ... Fr. bâtiment (bɑtimã)\n"
  1275. "ɔ ... Fr. homme (ɔm)\n"
  1276. "o ... Fr. eau (o)\n"
  1277. "ø ... Fr. peu (pø)\n"
  1278. "œ ... Fr. boeuf (bœf), coeur (kœr)\n"
  1279. "u ... Fr. douce (dus)\n"
  1280. "ʏ ... Ger. Müller (ˈmʏlər)\n"
  1281. "y ... Fr. du (dy)\n"
  1282. "\n"
  1283. "Long\n"
  1284. "iː as in bean (biːn)\n"
  1285. "ɑː ... barn (bɑːn)\n"
  1286. "ɔː ... born (bɔːn)\n"
  1287. "uː ... boon (buːn)\n"
  1288. "ɜː ... burn (bɜːn)\n"
  1289. "eː ... Ger. Schnee (ʃneː)\n"
  1290. "ɛː ... Ger. Fähre (ˈfɛːrə)\n"
  1291. "aː ... Ger. Tag (taːk)\n"
  1292. "oː ... Ger. Sohn (zoːn)\n"
  1293. "øː ... Ger. Goethe (gøːtə)\n"
  1294. "yː ... Ger. grün (gryːn)\n"
  1295. "\n"
  1296. "Nasal\n"
  1297. "ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n"
  1298. "ã ... Fr. franc (frã)\n"
  1299. "ɔ˜ ... Fr. bon (bɔ˜n)\n"
  1300. "œ˜ ... Fr. un (œ˜)\n"
  1301. "\n"
  1302. "Dipthongs, etc.\n"
  1303. "eɪ as in bay (beɪ)\n"
  1304. "aɪ ... buy (baɪ)\n"
  1305. "ɔɪ ... boy (bɔɪ)\n"
  1306. "əʊ ... no (nəʊ)\n"
  1307. "aʊ ... now (naʊ)\n"
  1308. "ɪə ... peer (pɪə(r))\n"
  1309. "ɛə ... pair (pɛə(r))\n"
  1310. "ʊə ... tour (tʊə(r))\n"
  1311. "ɔə ... boar (bɔə(r))\n"
  1312. "\n"
  1313. "III. STRESS\n"
  1314. "\n"
  1315. "Main stress: ˈ preceding stressed syllable\n"
  1316. "Secondary stress: ˌ preceding stressed syllable\n"
  1317. "\n"
  1318. "E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n";
  1319. /* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */
  1320. void
  1321. oedprintkey(void)
  1322. {
  1323. Bprint(bout, "%s", prkey);
  1324. }
  1325. /*
  1326. * f points just after a '&', fe points at end of entry.
  1327. * Accumulate the special name, starting after the &
  1328. * and continuing until the next '.', in spec[].
  1329. * Return pointer to char after '.'.
  1330. */
  1331. static char *
  1332. getspec(char *f, char *fe)
  1333. {
  1334. char *t;
  1335. int c, i;
  1336. t = spec;
  1337. i = sizeof spec;
  1338. while(--i > 0) {
  1339. c = *f++;
  1340. if(c == '.' || f == fe)
  1341. break;
  1342. *t++ = c;
  1343. }
  1344. *t = 0;
  1345. return f;
  1346. }
  1347. /*
  1348. * f points just after '<'; fe points at end of entry.
  1349. * Expect next characters from bin to match:
  1350. * [/][^ >]+( [^>=]+=[^ >]+)*>
  1351. * tag auxname auxval
  1352. * Accumulate the tag and its auxilliary information in
  1353. * tag[], auxname[][] and auxval[][].
  1354. * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0.
  1355. * Set naux to the number of aux pairs found.
  1356. * Return pointer to after final '>'.
  1357. */
  1358. static char *
  1359. gettag(char *f, char *fe)
  1360. {
  1361. char *t;
  1362. int c, i;
  1363. t = tag;
  1364. c = *f++;
  1365. if(c == '/')
  1366. tagstarts = 0;
  1367. else {
  1368. tagstarts = 1;
  1369. *t++ = c;
  1370. }
  1371. i = Buflen;
  1372. naux = 0;
  1373. while(--i > 0) {
  1374. c = *f++;
  1375. if(c == '>' || f == fe)
  1376. break;
  1377. if(c == ' ') {
  1378. *t = 0;
  1379. t = auxname[naux];
  1380. i = Buflen;
  1381. if(naux < Maxaux-1)
  1382. naux++;
  1383. } else if(naux && c == '=') {
  1384. *t = 0;
  1385. t = auxval[naux-1];
  1386. i = Buflen;
  1387. } else
  1388. *t++ = c;
  1389. }
  1390. *t = 0;
  1391. return f;
  1392. }
  1393. static void
  1394. dostatus(void)
  1395. {
  1396. char *s;
  1397. s = auxstate[St];
  1398. if(s) {
  1399. if(strcmp(s, "obs") == 0)
  1400. outrune(L'†');
  1401. else if(strcmp(s, "ali") == 0)
  1402. outrune(L'‖');
  1403. else if(strcmp(s, "err") == 0 || strcmp(s, "spu") == 0)
  1404. outrune(L'¶');
  1405. else if(strcmp(s, "xref") == 0)
  1406. {/* nothing */}
  1407. else if(debug)
  1408. err("status %ld %d %s", curentry.doff, cursize, s);
  1409. }
  1410. }