pcollins.c 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. #include <bio.h>
  12. #include "dict.h"
  13. /*
  14. * Routines for handling dictionaries in the "Paperback Collins"
  15. * format (with tags surrounded by >....<)
  16. */
  17. enum {
  18. Buflen=1000,
  19. };
  20. /* More special runes */
  21. enum {
  22. B = MULTIE+1, /* bold */
  23. H, /* headword start */
  24. I, /* italics */
  25. Ps, /* pronunciation start */
  26. Pe, /* pronunciation end */
  27. R, /* roman */
  28. X, /* headword end */
  29. };
  30. /* Assoc tables must be sorted on first field */
  31. static Assoc tagtab[] = {
  32. {"AA", L'Å'},
  33. {"AC", LACU},
  34. {"B", B},
  35. {"CE", LCED},
  36. {"CI", LFRN},
  37. {"Di", L'ı'},
  38. {"EL", L'-'},
  39. {"GR", LGRV},
  40. {"H", H},
  41. {"I", I},
  42. {"OE", L'Œ'},
  43. {"R", R},
  44. {"TI", LTIL},
  45. {"UM", LUML},
  46. {"X", X},
  47. {"[", Ps},
  48. {"]", Pe},
  49. {"ac", LACU},
  50. {"ce", LCED},
  51. {"ci", LFRN},
  52. {"gr", LGRV},
  53. {"oe", L'œ'},
  54. {"supe", L'e'}, /* should be raised */
  55. {"supo", L'o'}, /* should be raised */
  56. {"ti", LTIL},
  57. {"um", LUML},
  58. {"{", Ps},
  59. {"~", L'~'},
  60. {"~~", MTT},
  61. };
  62. static Rune normtab[128] = {
  63. /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
  64. /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  65. NONE, NONE, L' ', NONE, NONE, NONE, NONE, NONE,
  66. /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  67. NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
  68. /*20*/ L' ', L'!', L'"', L'#', L'$', L'%', L'&', L'\'',
  69. L'(', L')', L'*', L'+', L',', L'-', L'.', L'/',
  70. /*30*/ L'0', L'1', L'2', L'3', L'4', L'5', L'6', L'7',
  71. L'8', L'9', L':', L';', TAGE, L'=', TAGS, L'?',
  72. /*40*/ L'@', L'A', L'B', L'C', L'D', L'E', L'F', L'G',
  73. L'H', L'I', L'J', L'K', L'L', L'M', L'N', L'O',
  74. /*50*/ L'P', L'Q', L'R', L'S', L'T', L'U', L'V', L'W',
  75. L'X', L'Y', L'Z', L'[', L'\\', L']', L'^', L'_',
  76. /*60*/ L'`', L'a', L'b', L'c', L'd', L'e', L'f', L'g',
  77. L'h', L'i', L'j', L'k', L'l', L'm', L'n', L'o',
  78. /*70*/ L'p', L'q', L'r', L's', L't', L'u', L'v', L'w',
  79. L'x', L'y', L'z', L'{', L'|', L'}', L'~', NONE,
  80. };
  81. static char *gettag(char *, char *);
  82. static Entry curentry;
  83. static char tag[Buflen];
  84. #define cursize (curentry.end-curentry.start)
  85. void
  86. pcollprintentry(Entry e, int cmd)
  87. {
  88. char *p, *pe;
  89. int32_t r, rprev, t, rlig;
  90. int saveoi;
  91. Rune *transtab;
  92. p = e.start;
  93. pe = e.end;
  94. transtab = normtab;
  95. rprev = NONE;
  96. changett(0, 0, 0);
  97. curentry = e;
  98. saveoi = 0;
  99. if(cmd == 'h')
  100. outinhibit = 1;
  101. while(p < pe) {
  102. if(cmd == 'r') {
  103. outchar(*p++);
  104. continue;
  105. }
  106. r = transtab[(*p++)&0x7F];
  107. if(r < NONE) {
  108. /* Emit the rune, but buffer in case of ligature */
  109. if(rprev != NONE)
  110. outrune(rprev);
  111. rprev = r;
  112. } else if(r == TAGS) {
  113. p = gettag(p, pe);
  114. t = lookassoc(tagtab, asize(tagtab), tag);
  115. if(t == -1) {
  116. if(debug && !outinhibit)
  117. err("tag %ld %d %s",
  118. e.doff, cursize, tag);
  119. continue;
  120. }
  121. if(t < NONE) {
  122. if(rprev != NONE)
  123. outrune(rprev);
  124. rprev = t;
  125. } else if(t >= LIGS && t < LIGE) {
  126. /* handle possible ligature */
  127. rlig = liglookup(t, rprev);
  128. if(rlig != NONE)
  129. rprev = rlig; /* overwrite rprev */
  130. else {
  131. /* could print accent, but let's not */
  132. if(rprev != NONE) outrune(rprev);
  133. rprev = NONE;
  134. }
  135. } else if(t >= MULTI && t < MULTIE) {
  136. if(rprev != NONE) {
  137. outrune(rprev);
  138. rprev = NONE;
  139. }
  140. outrunes(multitab[t-MULTI]);
  141. } else {
  142. if(rprev != NONE) {
  143. outrune(rprev);
  144. rprev = NONE;
  145. }
  146. switch(t){
  147. case H:
  148. if(cmd == 'h')
  149. outinhibit = 0;
  150. else
  151. outnl(0);
  152. break;
  153. case X:
  154. if(cmd == 'h')
  155. outinhibit = 1;
  156. else
  157. outchars(". ");
  158. break;
  159. case Ps:
  160. /* don't know enough of pron. key yet */
  161. saveoi = outinhibit;
  162. outinhibit = 1;
  163. break;
  164. case Pe:
  165. outinhibit = saveoi;
  166. break;
  167. }
  168. }
  169. }
  170. }
  171. if(cmd == 'h')
  172. outinhibit = 0;
  173. outnl(0);
  174. }
  175. int32_t
  176. pcollnextoff(int32_t fromoff)
  177. {
  178. int32_t a;
  179. char *p;
  180. a = Bseek(bdict, fromoff, 0);
  181. if(a < 0)
  182. return -1;
  183. for(;;) {
  184. p = Brdline(bdict, '\n');
  185. if(!p)
  186. break;
  187. if(p[0] == '>' && p[1] == 'H' && p[2] == '<')
  188. return (Boffset(bdict)-Blinelen(bdict));
  189. }
  190. return -1;
  191. }
  192. void
  193. pcollprintkey(void)
  194. {
  195. Bprint(bout, "No pronunciation key yet\n");
  196. }
  197. /*
  198. * f points just after '>'; fe points at end of entry.
  199. * Expect next characters from bin to match:
  200. * [^ <]+<
  201. * tag
  202. * Accumulate the tag in tag[].
  203. * Return pointer to after final '<'.
  204. */
  205. static char *
  206. gettag(char *f, char *fe)
  207. {
  208. char *t;
  209. int c, i;
  210. t = tag;
  211. i = Buflen;
  212. while(--i > 0) {
  213. c = *f++;
  214. if(c == '<' || f == fe)
  215. break;
  216. *t++ = c;
  217. }
  218. *t = 0;
  219. return f;
  220. }