UilLexAna.c 83 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544
  1. /*
  2. * CDE - Common Desktop Environment
  3. *
  4. * Copyright (c) 1993-2012, The Open Group. All rights reserved.
  5. *
  6. * These libraries and programs are free software; you can
  7. * redistribute them and/or modify them under the terms of the GNU
  8. * Lesser General Public License as published by the Free Software
  9. * Foundation; either version 2 of the License, or (at your option)
  10. * any later version.
  11. *
  12. * These libraries and programs are distributed in the hope that
  13. * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14. * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15. * PURPOSE. See the GNU Lesser General Public License for more
  16. * details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with these librararies and programs; if not, write
  20. * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21. * Floor, Boston, MA 02110-1301 USA
  22. */
  23. /*
  24. * @OSF_COPYRIGHT@
  25. * COPYRIGHT NOTICE
  26. * Copyright (c) 1990, 1991, 1992, 1993 Open Software Foundation, Inc.
  27. * ALL RIGHTS RESERVED (MOTIF). See the file named COPYRIGHT.MOTIF for
  28. * the full copyright text.
  29. */
  30. /*
  31. * HISTORY
  32. */
  33. #ifdef REV_INFO
  34. #ifndef lint
  35. static char rcsid[] = "$TOG: UilLexAna.c /main/14 1997/03/12 15:10:52 dbl $"
  36. #endif
  37. #endif
  38. /*
  39. * (c) Copyright 1989, 1990, DIGITAL EQUIPMENT CORPORATION, MAYNARD, MASS. */
  40. /*
  41. **++
  42. ** FACILITY:
  43. **
  44. ** User Interface Language Compiler (UIL)
  45. **
  46. ** ABSTRACT:
  47. **
  48. ** This module hold the routines that build tokens for the UIL
  49. ** compiler.
  50. **
  51. **--
  52. **/
  53. /*
  54. **
  55. ** INCLUDE FILES
  56. **
  57. **/
  58. #include <Xm/Xm.h>
  59. /* I think this one should be public too, it's not the case right now,
  60. and I don't want to include XmP.h here - dd */
  61. extern char *_XmStringGetCurrentCharset ();
  62. #include <Xm/XmosP.h> /* Need this for MB_CUR_MAX */
  63. #include <Mrm/MrmosI.h> /* Need this for _MrmOSSetLocale. */
  64. #include "UilDefI.h"
  65. #include <ctype.h>
  66. double atof();
  67. #include <errno.h> /* needed to support conversion functions */
  68. #ifdef X_NOT_STDC_ENV
  69. extern int errno;
  70. #endif
  71. /*
  72. **
  73. ** TABLE OF CONTENTS
  74. **
  75. */
  76. /*
  77. ** FORWARD DECLARATIONS
  78. */
  79. typedef struct _lex_buffer_type
  80. {
  81. struct _lex_buffer_type *az_next_buffer;
  82. unsigned char c_text[ 1 ];
  83. } lex_buffer_type;
  84. static lex_buffer_type *get_lex_buffer _ARGUMENTS(( lex_buffer_type *az_current_lex_buffer ));
  85. #if debug_version
  86. static void dump_token _ARGUMENTS(( lex_buffer_type *az_current_lex_buffer , int l_lex_pos ));
  87. #endif
  88. /*
  89. **
  90. ** EXTERNAL DEFINITIONS
  91. **
  92. */
  93. /* These values are set by Lex initiailize routine or if the charset */
  94. /* option is specified on the module declaration */
  95. externaldef(uil_comp_glbl) int Uil_lex_l_user_default_charset;
  96. externaldef(uil_comp_glbl) sym_value_entry_type *Uil_lex_az_charset_entry;
  97. externaldef(uil_comp_glbl) int Uil_lex_l_localized;
  98. /* This is the most recent character set recognized by the grammar. It */
  99. /* allows the use of the CHARACTER_SET function to prefix general string */
  100. /* literals. */
  101. externaldef(uil_comp_glbl) int Uil_lex_l_charset_specified;
  102. externaldef(uil_comp_glbl) int Uil_lex_l_literal_charset;
  103. externaldef(uil_comp_glbl) sym_value_entry_type *Uil_lex_az_literal_charset;
  104. /* %COMPLETE */
  105. externaldef(uil_comp_glbl) int Uil_characters_read;
  106. /* For portability, declare all yy* variables as extern. */
  107. extern yystype yylval;
  108. extern yystype prev_yylval;
  109. /* This stack entry is used as the result of epsilon productions. */
  110. extern yystype gz_yynullval;
  111. /*
  112. * Retain comments as scanned
  113. */
  114. #define INITIAL_COMMENT_SIZE 8000
  115. #define INCR_COMMENT_SIZE 2000
  116. externaldef(uil_comp_glbl) char *comment_text;
  117. externaldef(uil_comp_glbl) int comment_size;
  118. static int last_token_seen = 0;
  119. /*
  120. **
  121. ** MACRO DEFINITIONS
  122. **
  123. **/
  124. /*
  125. ** Each of the 256 possible input characters has an associated class.
  126. ** These defines gives names to the classes.
  127. */
  128. #define class_blank 0 /* white space */
  129. #define class_punc 1 /* punctuation characters */
  130. #define class_name 2 /* alphabetics - {eEntvbrf} + {$ _} */
  131. #define class_e 3 /* e */
  132. #define class_escape 4 /* n t v b r f */
  133. #define class_ff 5 /* form feed */
  134. #define class_digit 6 /* 0..9 */
  135. #define class_dot 7 /* . */
  136. #define class_sign 8 /* - + */
  137. #define class_quote 9 /* ' */
  138. #define class_bslash 10 /* \ */
  139. #define class_slash 11 /* / */
  140. #define class_star 12 /* * */
  141. #define class_exclam 13 /* ! */
  142. #define class_eol 14 /* end of line - ascii nul */
  143. #define class_rest 15 /* remaining printable characters */
  144. #define class_illegal 16 /* remaining non printable characters */
  145. #define class_pound 17 /* # */
  146. #define class_dquote 18 /* " */
  147. #define class_langle 19 /* < */
  148. #define class_rangle 20 /* > */
  149. #define class_highbit 21 /* high order bit set */
  150. #define max_class 21
  151. /*
  152. ** states within the token table
  153. */
  154. #define state_initial 0 /* initial state of automaton */
  155. #define state_name 1 /* looking for a name */
  156. #define state_integer 2 /* looking for a integer */
  157. #define state_real_1 3 /* looking for a real */
  158. #define state_real_2 4 /* looking for a real */
  159. #define state_exp_1 5 /* looking for a real with exponent */
  160. #define state_exp_2 6 /* looking for a real with exponent */
  161. #define state_exp_3 7 /* looking for a real with exponent */
  162. #define state_str_1 8 /* looking for a string */
  163. #define state_str_2 9 /* looking for a string */
  164. #define state_str_3 10 /* looking for a string */
  165. #define state_comment_1 11 /* looking for a comment */
  166. #define state_comment_2 12 /* looking for a comment */
  167. #define state_comment_3 13 /* looking for a comment */
  168. #define state_comment_4 14 /* looking for a comment */
  169. #define state_eat_rest 15 /* eat up rest characters */
  170. #define state_gstr_1 16 /* looking for a general string */
  171. #define state_gstr_2 17 /* looking for a general string */
  172. #define state_gstr_3 18 /* looking for a general string */
  173. #define state_gstr_4 19 /* looking for a general string */
  174. #define state_gstr_5 20 /* looking for a general string */
  175. #define state_langle 21 /* looking for << */
  176. #define state_rangle 22 /* looking for >> */
  177. #define max_state 22
  178. /*
  179. ** actions in token table
  180. ** all actions requiring a move of the current character have
  181. ** a negative value
  182. */
  183. #define NEGATIVE 0x40
  184. #define min_action 1
  185. #define move_advance (NEGATIVE | 1) /* save character - goto next state */
  186. #define advance 1 /* goto next state */
  187. #define move_final (NEGATIVE | 2) /* save character - found token */
  188. #define final 2 /* found token */
  189. #define move_error (NEGATIVE | 3) /* save character - found error */
  190. #define error 3 /* found error */
  191. #define move_special (NEGATIVE | 4) /* special action */
  192. #define special 4 /* save character - special action */
  193. #define reset 5 /* reset the analyzer */
  194. #define final_comment (NEGATIVE | 6) /* save whole comment RAP */
  195. /*
  196. ** errors encoded in the token table
  197. */
  198. #define min_error 0
  199. #define bad_prefix 0 /* junk starts a token */
  200. #define error_max 0
  201. /*
  202. ** final states in the token table
  203. */
  204. #define token_min 1
  205. #define token_punc 1 /* punctuation character */
  206. #define token_eol 2 /* end of line */
  207. #define token_real 3 /* real number */
  208. #define token_integer 4 /* integer */
  209. #define token_name 5 /* identifier */
  210. #define token_ustring 6 /* unterminated string */
  211. #define token_string 7 /* string */
  212. #define token_ff 8 /* form feed */
  213. #define token_ugstr 9 /* unterminated general string */
  214. #define token_gstr 10 /* general string */
  215. #define token_punc2 11 /* 2 character punctuation */
  216. #define token_comment 12 /* comment block RAP */
  217. #define token_lstr 13 /* localized general string */
  218. #define token_max 13
  219. /*
  220. ** special actions that take place
  221. */
  222. #define control_char 1 /* unprintable character in construct */
  223. #define start_bslash 2 /* start of \ construct in string */
  224. #define ignore_bslash 3 /* \ not followed by correct sequence */
  225. #define found_digit 4 /* digit in \ sequence */
  226. #define end_digits 5 /* end of \digit...\ sequence */
  227. #define insert_char 6 /* end of \x sequence */
  228. #define missing_bslash 7 /* \ at end of \digit...\ missing */
  229. #define string_wrap 8 /* string wraps to next line */
  230. #define comment_wrap 9 /* comment wraps to next line */
  231. #define charset_gstr 10 /* found char set for general string */
  232. #define nocharset_gstr 11 /* default char set for general string */
  233. #define highbit_char 12 /* default char set for general string */
  234. /*
  235. ** define backup field values of a token table cell
  236. */
  237. #define not_used 0 /* value not defined for this action */
  238. #define backup_0 0 /* no backup required */
  239. #define backup_1 1 /* 1 character backup needed */
  240. #define backup_2 2 /* 2 character backup needed */
  241. #define backup_3 3 /* 3 character backup needed */
  242. /*
  243. ** form of each cell in the token table
  244. */
  245. typedef struct
  246. {
  247. char action; /* action to be taken */
  248. char next_state; /* next state */
  249. /* terminal found */
  250. /* error found */
  251. char backup; /* number of character to backup */
  252. /* action for special cases */
  253. char unused;
  254. } cell;
  255. /*
  256. **
  257. ** OWN Storage for TOKEN BUILDER
  258. **
  259. */
  260. #define l_max_lex_buffer_pos 127
  261. static lex_buffer_type *az_first_lex_buffer;
  262. /*
  263. ** the actual token table
  264. */
  265. static cell XmConst token_table[ max_state+1][ max_class+1] =
  266. {
  267. { /* state_initial */
  268. /* class_blank */ { reset, state_initial, not_used },
  269. /* class_punc */ { move_final, token_punc, backup_0 },
  270. /* class_name */ { move_advance, state_name, not_used },
  271. /* class_e */ { move_advance, state_name, not_used },
  272. /* class_escape */ { move_advance, state_name, not_used },
  273. /* class_ff */ { final, token_ff, backup_0 },
  274. /* class_digit */ { move_advance, state_integer, not_used },
  275. /* class_dot */ { move_advance, state_real_1, not_used },
  276. /* class_sign */ { move_final, token_punc, backup_0 },
  277. /* class_quote */ { advance, state_str_1, not_used },
  278. /* class_bslash */ { move_advance, state_eat_rest, not_used },
  279. /* class_slash */ { move_advance, state_comment_1, not_used },
  280. /* class_star */ { move_final, token_punc, backup_0 },
  281. /* class_exclam */ { move_advance, state_comment_4, not_used },
  282. /* class_eol */ { final, token_eol, backup_0 },
  283. /* class_rest */ { move_advance, state_eat_rest, not_used },
  284. /* class_illegal */ { special, state_initial, control_char },
  285. /* class_pound */ { move_final, token_punc, backup_0 },
  286. /* class_dquote */ { special, state_gstr_3, nocharset_gstr },
  287. /* class_langle */ { move_advance, state_langle, not_used },
  288. /* class_rangle */ { move_advance, state_rangle, not_used },
  289. /* class_highbit */ { move_advance, state_eat_rest, not_used },
  290. },
  291. { /* state_name */
  292. /* class_blank */ { final, token_name, backup_0 },
  293. /* class_punc */ { move_final, token_name, backup_1 },
  294. /* class_name */ { move_advance, state_name, not_used },
  295. /* class_e */ { move_advance, state_name, not_used },
  296. /* class_escape */ { move_advance, state_name, not_used },
  297. /* class_ff */ { move_final, token_name, backup_1 },
  298. /* class_digit */ { move_advance, state_name, not_used },
  299. /* class_dot */ { move_final, token_name, backup_1 },
  300. /* class_sign */ { move_final, token_name, backup_1 },
  301. /* class_quote */ { move_final, token_name, backup_1 },
  302. /* class_bslash */ { move_final, token_name, backup_1 },
  303. /* class_slash */ { move_final, token_name, backup_1 },
  304. /* class_star */ { move_final, token_name, backup_1 },
  305. /* class_exclam */ { move_final, token_name, backup_1 },
  306. /* class_eol */ { move_final, token_name, backup_1 },
  307. /* class_rest */ { move_final, token_name, backup_1 },
  308. /* class_illegal */ { move_final, token_name, backup_1 },
  309. /* class_pound */ { move_final, token_name, backup_1 },
  310. /* class_dquote */ { move_final, token_name, backup_1 },
  311. /* class_langle */ { move_final, token_name, backup_1 },
  312. /* class_rangle */ { move_final, token_name, backup_1 },
  313. /* class_highbit */ { move_final, token_name, backup_1 },
  314. },
  315. { /* state_integer */
  316. /* class_blank */ { final, token_integer, backup_0 },
  317. /* class_punc */ { move_final, token_integer, backup_1 },
  318. /* class_name */ { move_final, token_integer, backup_1 },
  319. /* class_e */ { move_final, token_integer, backup_1 },
  320. /* class_escape */ { move_final, token_integer, backup_1 },
  321. /* class_ff */ { move_final, token_integer, backup_1 },
  322. /* class_digit */ { move_advance, state_integer, not_used },
  323. /* class_dot */ { move_advance, state_real_2, not_used },
  324. /* class_sign */ { move_final, token_integer, backup_1 },
  325. /* class_quote */ { move_final, token_integer, backup_1 },
  326. /* class_bslash */ { move_final, token_integer, backup_1 },
  327. /* class_slash */ { move_final, token_integer, backup_1 },
  328. /* class_star */ { move_final, token_integer, backup_1 },
  329. /* class_exclam */ { move_final, token_integer, backup_1 },
  330. /* class_eol */ { move_final, token_integer, backup_1 },
  331. /* class_rest */ { move_final, token_integer, backup_1 },
  332. /* class_illegal */ { move_final, token_integer, backup_1 },
  333. /* class_pound */ { move_final, token_integer, backup_1 },
  334. /* class_dquote */ { move_final, token_integer, backup_1 },
  335. /* class_langle */ { move_final, token_integer, backup_1 },
  336. /* class_rangle */ { move_final, token_integer, backup_1 },
  337. /* class_highbit */ { move_final, token_integer, backup_1 },
  338. },
  339. { /* state_real_1 */
  340. /* class_blank */ { move_advance, state_eat_rest, not_used },
  341. /* class_punc */ { move_error, bad_prefix, backup_1 },
  342. /* class_name */ { move_error, bad_prefix, backup_1 },
  343. /* class_e */ { move_error, bad_prefix, backup_1 },
  344. /* class_escape */ { move_error, bad_prefix, backup_1 },
  345. /* class_ff */ { special, state_real_1, control_char },
  346. /* class_digit */ { move_advance, state_real_2, not_used },
  347. /* class_dot */ { move_error, bad_prefix, backup_1 },
  348. /* class_sign */ { move_error, bad_prefix, backup_1 },
  349. /* class_quote */ { move_error, bad_prefix, backup_1 },
  350. /* class_bslash */ { move_error, bad_prefix, backup_1 },
  351. /* class_slash */ { move_error, bad_prefix, backup_1 },
  352. /* class_star */ { move_error, bad_prefix, backup_1 },
  353. /* class_exclam */ { move_error, bad_prefix, backup_1 },
  354. /* class_eol */ { move_error, bad_prefix, backup_1 },
  355. /* class_rest */ { move_advance, state_eat_rest, not_used },
  356. /* class_illegal */ { special, state_real_1, control_char },
  357. /* class_pound */ { move_error, bad_prefix, backup_1 },
  358. /* class_dquote */ { move_error, bad_prefix, backup_1 },
  359. /* class_langle */ { move_error, bad_prefix, backup_1 },
  360. /* class_rangle */ { move_error, bad_prefix, backup_1 },
  361. /* class_highbit */ { move_advance, state_eat_rest, not_used },
  362. },
  363. { /* state_real_2 */
  364. /* class_blank */ { final, token_real, backup_0 },
  365. /* class_punc */ { move_final, token_real, backup_1 },
  366. /* class_name */ { move_final, token_real, backup_1 },
  367. /* class_e */ { move_advance, state_exp_1, not_used },
  368. /* class_escape */ { move_final, token_real, backup_1 },
  369. /* class_ff */ { move_final, token_real, backup_1 },
  370. /* class_digit */ { move_advance, state_real_2, not_used },
  371. /* class_dot */ { move_final, token_real, backup_1 },
  372. /* class_sign */ { move_final, token_real, backup_1 },
  373. /* class_quote */ { move_final, token_real, backup_1 },
  374. /* class_bslash */ { move_final, token_real, backup_1 },
  375. /* class_slash */ { move_final, token_real, backup_1 },
  376. /* class_star */ { move_final, token_real, backup_1 },
  377. /* class_exclam */ { move_final, token_real, backup_1 },
  378. /* class_eol */ { move_final, token_real, backup_1 },
  379. /* class_rest */ { move_final, token_real, backup_1 },
  380. /* class_illegal */ { move_final, token_real, backup_1 },
  381. /* class_pound */ { move_final, token_real, backup_1 },
  382. /* class_dquote */ { move_final, token_real, backup_1 },
  383. /* class_langle */ { move_final, token_real, backup_1 },
  384. /* class_rangle */ { move_final, token_real, backup_1 },
  385. /* class_highbit */ { move_final, token_real, backup_1 },
  386. },
  387. { /* state_exp_1 */
  388. /* class_blank */ { move_final, token_real, backup_2 },
  389. /* class_punc */ { move_final, token_real, backup_2 },
  390. /* class_name */ { move_final, token_real, backup_2 },
  391. /* class_e */ { move_final, token_real, backup_2 },
  392. /* class_escape */ { move_final, token_real, backup_2 },
  393. /* class_ff */ { move_final, token_real, backup_2 },
  394. /* class_digit */ { move_advance, state_exp_3, not_used },
  395. /* class_dot */ { move_final, token_real, backup_2 },
  396. /* class_sign */ { move_advance, state_exp_2, not_used },
  397. /* class_quote */ { move_final, token_real, backup_2 },
  398. /* class_bslash */ { move_final, token_real, backup_2 },
  399. /* class_slash */ { move_final, token_real, backup_2 },
  400. /* class_star */ { move_final, token_real, backup_2 },
  401. /* class_exclam */ { move_final, token_real, backup_2 },
  402. /* class_eol */ { move_final, token_real, backup_2 },
  403. /* class_rest */ { move_final, token_real, backup_2 },
  404. /* class_illegal */ { move_final, token_real, backup_2 },
  405. /* class_pound */ { move_final, token_real, backup_2 },
  406. /* class_dquote */ { move_final, token_real, backup_2 },
  407. /* class_langle */ { move_final, token_real, backup_2 },
  408. /* class_rangle */ { move_final, token_real, backup_2 },
  409. /* class_highbit */ { move_final, token_real, backup_2 },
  410. },
  411. { /* state_exp_2 */
  412. /* class_blank */ { move_final, token_real, backup_3 },
  413. /* class_punc */ { move_final, token_real, backup_3 },
  414. /* class_name */ { move_final, token_real, backup_3 },
  415. /* class_e */ { move_final, token_real, backup_3 },
  416. /* class_escape */ { move_final, token_real, backup_3 },
  417. /* class_ff */ { move_final, token_real, backup_3 },
  418. /* class_digit */ { move_advance, state_exp_3, not_used },
  419. /* class_dot */ { move_final, token_real, backup_3 },
  420. /* class_sign */ { move_final, token_real, backup_3 },
  421. /* class_quote */ { move_final, token_real, backup_3 },
  422. /* class_bslash */ { move_final, token_real, backup_3 },
  423. /* class_slash */ { move_final, token_real, backup_3 },
  424. /* class_star */ { move_final, token_real, backup_3 },
  425. /* class_exclam */ { move_final, token_real, backup_3 },
  426. /* class_eol */ { move_final, token_real, backup_3 },
  427. /* class_rest */ { move_final, token_real, backup_3 },
  428. /* class_illegal */ { move_final, token_real, backup_3 },
  429. /* class_pound */ { move_final, token_real, backup_3 },
  430. /* class_dquote */ { move_final, token_real, backup_3 },
  431. /* class_langle */ { move_final, token_real, backup_3 },
  432. /* class_rangle */ { move_final, token_real, backup_3 },
  433. /* class_highbit */ { move_final, token_real, backup_3 },
  434. },
  435. { /* state_exp_3 */
  436. /* class_blank */ { final, token_real, backup_0 },
  437. /* class_punc */ { move_final, token_real, backup_1 },
  438. /* class_name */ { move_final, token_real, backup_1 },
  439. /* class_e */ { move_final, token_real, backup_1 },
  440. /* class_escape */ { move_final, token_real, backup_1 },
  441. /* class_ff */ { move_final, token_real, backup_1 },
  442. /* class_digit */ { move_advance, state_exp_3, not_used },
  443. /* class_dot */ { move_final, token_real, backup_1 },
  444. /* class_sign */ { move_final, token_real, backup_1 },
  445. /* class_quote */ { move_final, token_real, backup_1 },
  446. /* class_bslash */ { move_final, token_real, backup_1 },
  447. /* class_slash */ { move_final, token_real, backup_1 },
  448. /* class_star */ { move_final, token_real, backup_1 },
  449. /* class_exclam */ { move_final, token_real, backup_1 },
  450. /* class_eol */ { move_final, token_real, backup_1 },
  451. /* class_rest */ { move_final, token_real, backup_1 },
  452. /* class_illegal */ { move_final, token_real, backup_1 },
  453. /* class_pound */ { move_final, token_real, backup_1 },
  454. /* class_dquote */ { move_final, token_real, backup_1 },
  455. /* class_langle */ { move_final, token_real, backup_1 },
  456. /* class_rangle */ { move_final, token_real, backup_1 },
  457. /* class_highbit */ { move_final, token_real, backup_1 },
  458. },
  459. { /* state_str_1 */
  460. /* class_blank */ { move_advance, state_str_1, not_used },
  461. /* class_punc */ { move_advance, state_str_1, not_used },
  462. /* class_name */ { move_advance, state_str_1, not_used },
  463. /* class_e */ { move_advance, state_str_1, not_used },
  464. /* class_escape */ { move_advance, state_str_1, not_used },
  465. /* class_ff */ { special, state_str_1, control_char },
  466. /* class_digit */ { move_advance, state_str_1, not_used },
  467. /* class_dot */ { move_advance, state_str_1, not_used },
  468. /* class_sign */ { move_advance, state_str_1, not_used },
  469. /* class_quote */ { final, token_string, backup_0 },
  470. /* class_bslash */ { special, state_str_2, start_bslash },
  471. /* class_slash */ { move_advance, state_str_1, not_used },
  472. /* class_star */ { move_advance, state_str_1, not_used },
  473. /* class_exclam */ { move_advance, state_str_1, not_used },
  474. /* class_eol */ { move_final, token_ustring, backup_1 },
  475. /* class_rest */ { move_advance, state_str_1, not_used },
  476. /* class_illegal */ { special, state_str_1, control_char },
  477. /* class_pound */ { move_advance, state_str_1, not_used },
  478. /* class_dquote */ { move_advance, state_str_1, not_used },
  479. /* class_langle */ { move_advance, state_str_1, not_used },
  480. /* class_rangle */ { move_advance, state_str_1, not_used },
  481. /* class_highbit */ { move_advance, state_str_1, not_used },
  482. },
  483. { /* state_str_2 */
  484. /* class_blank */ { move_special, state_str_1, ignore_bslash },
  485. /* class_punc */ { move_special, state_str_1, ignore_bslash },
  486. /* class_name */ { move_special, state_str_1, ignore_bslash },
  487. /* class_e */ { move_special, state_str_1, ignore_bslash },
  488. /* class_escape */ { special, state_str_1, insert_char },
  489. /* class_ff */ { special, state_str_1, control_char },
  490. /* class_digit */ { special, state_str_3, found_digit },
  491. /* class_dot */ { move_special, state_str_1, ignore_bslash },
  492. /* class_sign */ { move_special, state_str_1, ignore_bslash },
  493. /* class_quote */ { special, state_str_1, insert_char },
  494. /* class_bslash */ { special, state_str_1, insert_char },
  495. /* class_slash */ { move_special, state_str_1, ignore_bslash },
  496. /* class_star */ { move_special, state_str_1, ignore_bslash },
  497. /* class_exclam */ { move_special, state_str_1, ignore_bslash },
  498. /* class_eol */ { special, state_str_1, string_wrap },
  499. /* class_rest */ { move_special, state_str_1, ignore_bslash },
  500. /* class_illegal */ { special, state_str_1, control_char },
  501. /* class_pound */ { move_special, state_str_1, ignore_bslash },
  502. /* class_dquote */ { special, state_str_1, insert_char },
  503. /* class_langle */ { move_special, state_str_1, ignore_bslash },
  504. /* class_rangle */ { move_special, state_str_1, ignore_bslash },
  505. /* class_highbit */ { move_special, state_str_1, ignore_bslash },
  506. },
  507. { /* state_str_3 */
  508. /* class_blank */ { move_special, state_str_1, missing_bslash },
  509. /* class_punc */ { move_special, state_str_1, missing_bslash },
  510. /* class_name */ { move_special, state_str_1, missing_bslash },
  511. /* class_e */ { move_special, state_str_1, missing_bslash },
  512. /* class_escape */ { move_special, state_str_1, missing_bslash },
  513. /* class_ff */ { special, state_str_1, control_char },
  514. /* class_digit */ { special, state_str_3, found_digit },
  515. /* class_dot */ { move_special, state_str_1, missing_bslash },
  516. /* class_sign */ { move_special, state_str_1, missing_bslash },
  517. /* class_quote */ { move_special, state_str_1, missing_bslash },
  518. /* class_bslash */ { special, state_str_1, end_digits },
  519. /* class_slash */ { move_special, state_str_1, missing_bslash },
  520. /* class_star */ { move_special, state_str_1, missing_bslash },
  521. /* class_exclam */ { move_special, state_str_1, missing_bslash },
  522. /* class_eol */ { move_final, token_ustring, backup_1 },
  523. /* class_rest */ { move_special, state_str_1, missing_bslash },
  524. /* class_illegal */ { special, state_str_3, control_char },
  525. /* class_pound */ { move_special, state_str_1, missing_bslash },
  526. /* class_dquote */ { move_special, state_str_1, missing_bslash },
  527. /* class_langle */ { move_special, state_str_1, missing_bslash },
  528. /* class_rangle */ { move_special, state_str_1, missing_bslash },
  529. /* class_highbit */ { move_special, state_str_1, missing_bslash },
  530. },
  531. { /* state_comment_1 */
  532. /* class_blank */ { move_final, token_punc, backup_0 },
  533. /* class_punc */ { move_final, token_punc, backup_1 },
  534. /* class_name */ { move_final, token_punc, backup_1 },
  535. /* class_e */ { move_final, token_punc, backup_1 },
  536. /* class_escape */ { move_final, token_punc, backup_1 },
  537. /* class_ff */ { special, state_comment_1,control_char },
  538. /* class_digit */ { move_final, token_punc, backup_1 },
  539. /* class_dot */ { move_final, token_punc, backup_1 },
  540. /* class_sign */ { move_final, token_punc, backup_1 },
  541. /* class_quote */ { move_final, token_punc, backup_1 },
  542. /* class_bslash */ { move_final, token_punc, backup_1 },
  543. /* class_slash */ { move_final, token_punc, backup_1 },
  544. /* class_star */ { move_advance, state_comment_2, not_used },
  545. /* class_exclam */ { move_final, token_punc, backup_1 },
  546. /* class_eol */ { move_final, token_punc, backup_1 },
  547. /* class_rest */ { move_final, token_punc, backup_1 },
  548. /* class_illegal */ { special, state_comment_1,control_char },
  549. /* class_pound */ { move_final, token_punc, backup_1 },
  550. /* class_dquote */ { move_final, token_punc, backup_1 },
  551. /* class_langle */ { move_final, token_punc, backup_1 },
  552. /* class_rangle */ { move_final, token_punc, backup_1 },
  553. /* class_highbit */ { move_final, token_punc, backup_1 },
  554. },
  555. { /* state_comment_2 */
  556. /* class_blank */ { move_advance, state_comment_2, not_used },
  557. /* class_punc */ { move_advance, state_comment_2, not_used },
  558. /* class_name */ { move_advance, state_comment_2, not_used },
  559. /* class_e */ { move_advance, state_comment_2, not_used },
  560. /* class_escape */ { move_advance, state_comment_2, not_used },
  561. /* class_ff */ { move_special, state_comment_2,control_char },
  562. /* class_digit */ { move_advance, state_comment_2, not_used },
  563. /* class_dot */ { move_advance, state_comment_2, not_used },
  564. /* class_sign */ { move_advance, state_comment_2, not_used },
  565. /* class_quote */ { move_advance, state_comment_2, not_used },
  566. /* class_bslash */ { move_advance, state_comment_2, not_used },
  567. /* class_slash */ { move_advance, state_comment_2, not_used },
  568. /* class_star */ { move_advance, state_comment_3, not_used },
  569. /* class_exclam */ { move_advance, state_comment_2, not_used },
  570. /* class_eol */ { move_special, state_comment_2,comment_wrap },
  571. /* class_rest */ { move_advance, state_comment_2, not_used },
  572. /* class_illegal */ { move_special, state_comment_2,control_char },
  573. /* class_pound */ { move_advance, state_comment_2, not_used },
  574. /* class_dquote */ { move_advance, state_comment_2, not_used },
  575. /* class_langle */ { move_advance, state_comment_2, not_used },
  576. /* class_rangle */ { move_advance, state_comment_2, not_used },
  577. /* class_highbit */ { move_advance, state_comment_2, not_used },
  578. },
  579. { /* state_comment_3 */
  580. /* class_blank */ { move_advance, state_comment_2, not_used },
  581. /* class_punc */ { move_advance, state_comment_2, not_used },
  582. /* class_name */ { move_advance, state_comment_2, not_used },
  583. /* class_e */ { move_advance, state_comment_2, not_used },
  584. /* class_escape */ { move_advance, state_comment_2, not_used },
  585. /* class_ff */ { move_special, state_comment_2,control_char },
  586. /* class_digit */ { move_advance, state_comment_2, not_used },
  587. /* class_dot */ { move_advance, state_comment_2, not_used },
  588. /* class_sign */ { move_advance, state_comment_2, not_used },
  589. /* class_quote */ { move_advance, state_comment_2, not_used },
  590. /* class_bslash */ { move_advance, state_comment_2, not_used },
  591. /* class_slash */ { final_comment, token_comment, not_used },
  592. /* class_star */ { move_advance, state_comment_3, not_used },
  593. /* class_exclam */ { move_advance, state_comment_2, not_used },
  594. /* class_eol */ { move_special, state_comment_2,comment_wrap },
  595. /* class_rest */ { move_advance, state_comment_2, not_used },
  596. /* class_illegal */ { move_special, state_comment_2,control_char },
  597. /* class_pound */ { move_advance, state_comment_2, not_used },
  598. /* class_dquote */ { move_advance, state_comment_2, not_used },
  599. /* class_langle */ { move_advance, state_comment_2, not_used },
  600. /* class_rangle */ { move_advance, state_comment_2, not_used },
  601. /* class_highbit */ { move_advance, state_comment_2, not_used },
  602. },
  603. { /* state_comment_4 */
  604. /* class_blank */ { move_advance, state_comment_4, not_used },
  605. /* class_punc */ { move_advance, state_comment_4, not_used },
  606. /* class_name */ { move_advance, state_comment_4, not_used },
  607. /* class_e */ { move_advance, state_comment_4, not_used },
  608. /* class_escape */ { move_advance, state_comment_4, not_used },
  609. /* class_ff */ { move_special, state_comment_4,control_char },
  610. /* class_digit */ { move_advance, state_comment_4, not_used },
  611. /* class_dot */ { move_advance, state_comment_4, not_used },
  612. /* class_sign */ { move_advance, state_comment_4, not_used },
  613. /* class_quote */ { move_advance, state_comment_4, not_used },
  614. /* class_bslash */ { move_advance, state_comment_4, not_used },
  615. /* class_slash */ { move_advance, state_comment_4, not_used },
  616. /* class_star */ { move_advance, state_comment_4, not_used },
  617. /* class_exclam */ { move_advance, state_comment_4, not_used },
  618. /* class_eol */ { final_comment, token_comment, backup_1 },
  619. /* class_rest */ { move_advance, state_comment_4, not_used },
  620. /* class_illegal */ { move_special, state_comment_4,control_char },
  621. /* class_pound */ { move_advance, state_comment_4, not_used },
  622. /* class_dquote */ { move_advance, state_comment_4, not_used },
  623. /* class_langle */ { move_advance, state_comment_4, not_used },
  624. /* class_rangle */ { move_advance, state_comment_4, not_used },
  625. /* class_highbit */ { move_advance, state_comment_4, not_used },
  626. },
  627. { /* state_eat_rest */
  628. /* class_blank */ { move_advance, state_eat_rest, not_used },
  629. /* class_punc */ { move_error, bad_prefix, backup_1 },
  630. /* class_name */ { move_error, bad_prefix, backup_1 },
  631. /* class_e */ { move_error, bad_prefix, backup_1 },
  632. /* class_escape */ { move_error, bad_prefix, backup_1 },
  633. /* class_ff */ { special, state_eat_rest, control_char },
  634. /* class_digit */ { move_error, bad_prefix, backup_1 },
  635. /* class_dot */ { move_error, bad_prefix, backup_1 },
  636. /* class_sign */ { move_error, bad_prefix, backup_1 },
  637. /* class_quote */ { move_error, bad_prefix, backup_1 },
  638. /* class_bslash */ { move_advance, state_eat_rest, not_used },
  639. /* class_slash */ { move_error, bad_prefix, backup_1 },
  640. /* class_star */ { move_advance, state_eat_rest, not_used },
  641. /* class_exclam */ { move_error, bad_prefix, backup_1 },
  642. /* class_eol */ { move_error, bad_prefix, backup_1 },
  643. /* class_rest */ { move_advance, state_eat_rest, not_used },
  644. /* class_illegal */ { special, state_eat_rest, control_char },
  645. /* class_pound */ { move_error, bad_prefix, backup_1 },
  646. /* class_dquote */ { move_error, bad_prefix, backup_1 },
  647. /* class_langle */ { move_error, bad_prefix, backup_1 },
  648. /* class_rangle */ { move_error, bad_prefix, backup_1 },
  649. /* class_highbit */ { move_advance, state_eat_rest, not_used },
  650. },
  651. { /* state_gstr_1 */
  652. /* class_blank */ { move_error, bad_prefix, backup_1 },
  653. /* class_punc */ { move_error, bad_prefix, backup_1 },
  654. /* class_name */ { move_advance, state_gstr_2, not_used },
  655. /* class_e */ { move_advance, state_gstr_2, not_used },
  656. /* class_escape */ { move_advance, state_gstr_2, not_used },
  657. /* class_ff */ { special, state_gstr_1, control_char },
  658. /* class_digit */ { move_error, bad_prefix, backup_1 },
  659. /* class_dot */ { move_error, bad_prefix, backup_1 },
  660. /* class_sign */ { move_error, bad_prefix, backup_1 },
  661. /* class_quote */ { move_error, bad_prefix, backup_1 },
  662. /* class_bslash */ { move_advance, state_eat_rest, not_used },
  663. /* class_slash */ { move_error, bad_prefix, backup_1 },
  664. /* class_star */ { move_advance, state_eat_rest, not_used },
  665. /* class_exclam */ { move_error, bad_prefix, backup_1 },
  666. /* class_eol */ { move_error, bad_prefix, backup_1 },
  667. /* class_rest */ { move_advance, state_eat_rest, not_used },
  668. /* class_illegal */ { special, state_eat_rest, control_char },
  669. /* class_pound */ { move_error, bad_prefix, backup_1 },
  670. /* class_dquote */ { move_error, bad_prefix, backup_1 },
  671. /* class_langle */ { move_error, bad_prefix, backup_1 },
  672. /* class_rangle */ { move_error, bad_prefix, backup_1 },
  673. /* class_highbit */ { move_advance, state_eat_rest, not_used },
  674. },
  675. { /* state_gstr_2 */
  676. /* class_blank */ { move_error, bad_prefix, backup_1 },
  677. /* class_punc */ { move_error, bad_prefix, backup_1 },
  678. /* class_name */ { move_advance, state_gstr_2, not_used },
  679. /* class_e */ { move_advance, state_gstr_2, not_used },
  680. /* class_escape */ { move_advance, state_gstr_2, not_used },
  681. /* class_ff */ { special, state_gstr_2, control_char },
  682. /* class_digit */ { move_advance, state_gstr_2, not_used },
  683. /* class_dot */ { move_error, bad_prefix, backup_1 },
  684. /* class_sign */ { move_error, bad_prefix, backup_1 },
  685. /* class_quote */ { move_error, bad_prefix, backup_1 },
  686. /* class_bslash */ { move_advance, state_eat_rest, not_used },
  687. /* class_slash */ { move_error, bad_prefix, backup_1 },
  688. /* class_star */ { move_advance, state_eat_rest, not_used },
  689. /* class_exclam */ { move_error, bad_prefix, backup_1 },
  690. /* class_eol */ { move_error, bad_prefix, backup_1 },
  691. /* class_rest */ { move_advance, state_eat_rest, not_used },
  692. /* class_illegal */ { special, state_eat_rest, control_char },
  693. /* class_pound */ { move_error, bad_prefix, backup_1 },
  694. /* class_dquote */ { special, state_gstr_3, charset_gstr },
  695. /* class_langle */ { move_error, bad_prefix, backup_1 },
  696. /* class_rangle */ { move_error, bad_prefix, backup_1 },
  697. /* class_highbit */ { move_advance, state_eat_rest, not_used },
  698. },
  699. { /* state_gstr_3 */
  700. /* class_blank */ { move_advance, state_gstr_3, not_used },
  701. /* class_punc */ { move_advance, state_gstr_3, not_used },
  702. /* class_name */ { move_advance, state_gstr_3, not_used },
  703. /* class_e */ { move_advance, state_gstr_3, not_used },
  704. /* class_escape */ { move_advance, state_gstr_3, not_used },
  705. /* class_ff */ { special, state_gstr_3, control_char },
  706. /* class_digit */ { move_advance, state_gstr_3, not_used },
  707. /* class_dot */ { move_advance, state_gstr_3, not_used },
  708. /* class_sign */ { move_advance, state_gstr_3, not_used },
  709. /* class_quote */ { move_advance, state_gstr_3, not_used },
  710. /* class_bslash */ { special, state_gstr_4, start_bslash },
  711. /* class_slash */ { move_advance, state_gstr_3, not_used },
  712. /* class_star */ { move_advance, state_gstr_3, not_used },
  713. /* class_exclam */ { move_advance, state_gstr_3, not_used },
  714. /* class_eol */ { move_final, token_ugstr, backup_1 },
  715. /* class_rest */ { move_advance, state_gstr_3, not_used },
  716. /* class_illegal */ { special, state_gstr_3, control_char },
  717. /* class_pound */ { move_advance, state_gstr_3, not_used },
  718. /* class_dquote */ { final, token_gstr, backup_0 },
  719. /* class_langle */ { move_advance, state_gstr_3, not_used },
  720. /* class_rangle */ { move_advance, state_gstr_3, not_used },
  721. /* class_highbit */ { move_special, state_gstr_3, highbit_char },
  722. },
  723. { /* state_gstr_4 */
  724. /* class_blank */ { move_special, state_gstr_3, ignore_bslash },
  725. /* class_punc */ { move_special, state_gstr_3, ignore_bslash },
  726. /* class_name */ { move_special, state_gstr_3, ignore_bslash },
  727. /* class_e */ { move_special, state_gstr_3, ignore_bslash },
  728. /* class_escape */ { special, state_gstr_3, insert_char },
  729. /* class_ff */ { special, state_gstr_3, control_char },
  730. /* class_digit */ { special, state_gstr_5, found_digit },
  731. /* class_dot */ { move_special, state_gstr_3, ignore_bslash },
  732. /* class_sign */ { move_special, state_gstr_3, ignore_bslash },
  733. /* class_quote */ { special, state_gstr_3, insert_char },
  734. /* class_bslash */ { special, state_gstr_3, insert_char },
  735. /* class_slash */ { move_special, state_gstr_3, ignore_bslash },
  736. /* class_star */ { move_special, state_gstr_3, ignore_bslash },
  737. /* class_exclam */ { move_special, state_gstr_3, ignore_bslash },
  738. /* class_eol */ { move_final, token_ugstr, backup_1 },
  739. /* class_rest */ { move_special, state_gstr_3, ignore_bslash },
  740. /* class_illegal */ { special, state_gstr_3, control_char },
  741. /* class_pound */ { move_special, state_gstr_3, ignore_bslash },
  742. /* class_dquote */ { special, state_gstr_3, insert_char },
  743. /* class_langle */ { move_special, state_gstr_3, ignore_bslash },
  744. /* class_rangle */ { move_special, state_gstr_3, ignore_bslash },
  745. /* class_highbit */ { move_special, state_gstr_3, ignore_bslash },
  746. },
  747. { /* state_gstr_5 */
  748. /* class_blank */ { move_special, state_gstr_3, missing_bslash },
  749. /* class_punc */ { move_special, state_gstr_3, missing_bslash },
  750. /* class_name */ { move_special, state_gstr_3, missing_bslash },
  751. /* class_e */ { move_special, state_gstr_3, missing_bslash },
  752. /* class_escape */ { move_special, state_gstr_3, missing_bslash },
  753. /* class_ff */ { special, state_gstr_3, control_char },
  754. /* class_digit */ { special, state_gstr_5, found_digit },
  755. /* class_dot */ { move_special, state_gstr_3, missing_bslash },
  756. /* class_sign */ { move_special, state_gstr_3, missing_bslash },
  757. /* class_quote */ { move_special, state_gstr_3, missing_bslash },
  758. /* class_bslash */ { special, state_gstr_3, end_digits },
  759. /* class_slash */ { move_special, state_gstr_3, missing_bslash },
  760. /* class_star */ { move_special, state_gstr_3, missing_bslash },
  761. /* class_exclam */ { move_special, state_gstr_3, missing_bslash },
  762. /* class_eol */ { move_final, token_ugstr, backup_1 },
  763. /* class_rest */ { move_special, state_gstr_3, missing_bslash },
  764. /* class_illegal */ { special, state_str_3, control_char },
  765. /* class_pound */ { move_special, state_gstr_3, missing_bslash },
  766. /* class_dquote */ { move_special, state_gstr_3, missing_bslash },
  767. /* class_langle */ { move_special, state_gstr_3, missing_bslash },
  768. /* class_rangle */ { move_special, state_gstr_3, missing_bslash },
  769. /* class_highbit */ { move_special, state_gstr_3, missing_bslash },
  770. },
  771. { /* state_langle */
  772. /* class_blank */ { move_error, bad_prefix, backup_1 },
  773. /* class_punc */ { move_error, bad_prefix, backup_1 },
  774. /* class_name */ { move_error, bad_prefix, backup_1 },
  775. /* class_e */ { move_error, bad_prefix, backup_1 },
  776. /* class_escape */ { move_error, bad_prefix, backup_1 },
  777. /* class_ff */ { special, state_langle, control_char },
  778. /* class_digit */ { move_error, bad_prefix, backup_1 },
  779. /* class_dot */ { move_error, bad_prefix, backup_1 },
  780. /* class_sign */ { move_error, bad_prefix, backup_1 },
  781. /* class_quote */ { move_error, bad_prefix, backup_1 },
  782. /* class_bslash */ { move_error, bad_prefix, backup_1 },
  783. /* class_slash */ { move_error, bad_prefix, backup_1 },
  784. /* class_star */ { move_error, bad_prefix, backup_1 },
  785. /* class_exclam */ { move_error, bad_prefix, backup_1 },
  786. /* class_eol */ { move_final, token_ugstr, backup_1 },
  787. /* class_rest */ { move_advance, state_eat_rest, not_used },
  788. /* class_illegal */ { special, state_langle, control_char },
  789. /* class_pound */ { move_error, bad_prefix, backup_1 },
  790. /* class_dquote */ { move_error, bad_prefix, backup_1 },
  791. /* class_langle */ { move_final, token_punc2, backup_0 },
  792. /* class_rangle */ { move_error, bad_prefix, backup_1 },
  793. /* class_highbit */ { move_error, bad_prefix, backup_1 },
  794. },
  795. { /* state_rangle */
  796. /* class_blank */ { move_error, bad_prefix, backup_1 },
  797. /* class_punc */ { move_error, bad_prefix, backup_1 },
  798. /* class_name */ { move_error, bad_prefix, backup_1 },
  799. /* class_e */ { move_error, bad_prefix, backup_1 },
  800. /* class_escape */ { move_error, bad_prefix, backup_1 },
  801. /* class_ff */ { special, state_langle, control_char },
  802. /* class_digit */ { move_error, bad_prefix, backup_1 },
  803. /* class_dot */ { move_error, bad_prefix, backup_1 },
  804. /* class_sign */ { move_error, bad_prefix, backup_1 },
  805. /* class_quote */ { move_error, bad_prefix, backup_1 },
  806. /* class_bslash */ { move_error, bad_prefix, backup_1 },
  807. /* class_slash */ { move_error, bad_prefix, backup_1 },
  808. /* class_star */ { move_error, bad_prefix, backup_1 },
  809. /* class_exclam */ { move_error, bad_prefix, backup_1 },
  810. /* class_eol */ { move_final, token_ugstr, backup_1 },
  811. /* class_rest */ { move_advance, state_eat_rest, not_used },
  812. /* class_illegal */ { special, state_langle, control_char },
  813. /* class_pound */ { move_error, bad_prefix, backup_1 },
  814. /* class_dquote */ { move_error, bad_prefix, backup_1 },
  815. /* class_langle */ { move_error, bad_prefix, backup_1 },
  816. /* class_rangle */ { move_final, token_punc2, backup_0 },
  817. /* class_highbit */ { move_error, bad_prefix, backup_1 },
  818. },
  819. };
  820. static char class_table[ 256 ] =
  821. {
  822. /* 00 */ class_eol, class_illegal, class_illegal, class_illegal,
  823. /* 04 */ class_illegal, class_illegal, class_illegal, class_illegal,
  824. /* 08 */ class_illegal, class_blank, class_illegal, class_illegal,
  825. /* 0C */ class_ff, class_illegal, class_illegal, class_illegal,
  826. /* 10 */ class_illegal, class_illegal, class_illegal, class_illegal,
  827. /* 14 */ class_illegal, class_illegal, class_illegal, class_illegal,
  828. /* 18 */ class_illegal, class_illegal, class_illegal, class_illegal,
  829. /* 1C */ class_illegal, class_illegal, class_illegal, class_illegal,
  830. /* 20 */ class_blank, class_exclam, class_dquote, class_pound,
  831. /* 24 */ class_name, class_rest, class_punc, class_quote,
  832. /* 28 */ class_punc, class_punc, class_star, class_sign,
  833. /* 2C */ class_punc, class_sign, class_dot, class_slash,
  834. /* 30 */ class_digit, class_digit, class_digit, class_digit,
  835. /* 34 */ class_digit, class_digit, class_digit, class_digit,
  836. /* 38 */ class_digit, class_digit, class_punc, class_punc,
  837. /* 3C */ class_langle, class_punc, class_rangle, class_rest,
  838. /* 40 */ class_rest, class_name, class_name, class_name,
  839. /* 44 */ class_name, class_e, class_name, class_name,
  840. /* 48 */ class_name, class_name, class_name, class_name,
  841. /* 4C */ class_name, class_name, class_name, class_name,
  842. /* 50 */ class_name, class_name, class_name, class_name,
  843. /* 54 */ class_name, class_name, class_name, class_name,
  844. /* 58 */ class_name, class_name, class_name, class_rest,
  845. /* 5C */ class_bslash, class_rest, class_punc, class_name,
  846. /* 60 */ class_rest, class_name, class_escape, class_name,
  847. /* 64 */ class_name, class_e, class_escape, class_name,
  848. /* 68 */ class_name, class_name, class_name, class_name,
  849. /* 6C */ class_name, class_name, class_escape, class_name,
  850. /* 70 */ class_name, class_name, class_escape, class_name,
  851. /* 74 */ class_escape, class_name, class_escape, class_name,
  852. /* 78 */ class_name, class_name, class_name, class_punc,
  853. /* 7C */ class_punc, class_punc, class_punc, class_illegal,
  854. /* 80 */ class_illegal, class_illegal, class_illegal, class_illegal,
  855. /* 84 */ class_illegal, class_illegal, class_illegal, class_illegal,
  856. /* 88 */ class_illegal, class_illegal, class_illegal, class_illegal,
  857. /* 8C */ class_illegal, class_illegal, class_illegal, class_illegal,
  858. /* 90 */ class_illegal, class_illegal, class_illegal, class_illegal,
  859. /* 94 */ class_illegal, class_illegal, class_illegal, class_illegal,
  860. /* 98 */ class_illegal, class_illegal, class_illegal, class_illegal,
  861. /* 9C */ class_illegal, class_illegal, class_illegal, class_illegal,
  862. /* A0 */ class_highbit, class_highbit, class_highbit, class_highbit,
  863. /* A4 */ class_highbit, class_highbit, class_highbit, class_highbit,
  864. /* A8 */ class_highbit, class_highbit, class_highbit, class_highbit,
  865. /* AC */ class_highbit, class_highbit, class_highbit, class_highbit,
  866. /* B0 */ class_highbit, class_highbit, class_highbit, class_highbit,
  867. /* B4 */ class_highbit, class_highbit, class_highbit, class_highbit,
  868. /* B8 */ class_highbit, class_highbit, class_highbit, class_highbit,
  869. /* BC */ class_highbit, class_highbit, class_highbit, class_highbit,
  870. /* C0 */ class_highbit, class_highbit, class_highbit, class_highbit,
  871. /* C4 */ class_highbit, class_highbit, class_highbit, class_highbit,
  872. /* C8 */ class_highbit, class_highbit, class_highbit, class_highbit,
  873. /* CC */ class_highbit, class_highbit, class_highbit, class_highbit,
  874. /* D0 */ class_highbit, class_highbit, class_highbit, class_highbit,
  875. /* D4 */ class_highbit, class_highbit, class_highbit, class_highbit,
  876. /* D8 */ class_highbit, class_highbit, class_highbit, class_highbit,
  877. /* DC */ class_highbit, class_highbit, class_highbit, class_highbit,
  878. /* E0 */ class_highbit, class_highbit, class_highbit, class_highbit,
  879. /* E4 */ class_highbit, class_highbit, class_highbit, class_highbit,
  880. /* E8 */ class_highbit, class_highbit, class_highbit, class_highbit,
  881. /* EC */ class_highbit, class_highbit, class_highbit, class_highbit,
  882. /* F0 */ class_highbit, class_highbit, class_highbit, class_highbit,
  883. /* F4 */ class_highbit, class_highbit, class_highbit, class_highbit,
  884. /* F8 */ class_highbit, class_highbit, class_highbit, class_highbit,
  885. /* FC */ class_highbit, class_highbit, class_highbit, class_highbit
  886. };
  887. /* Tables to correlate token numbers and ASCII values for
  888. punctuation characters. Used by yylex and lex_issue_error. */
  889. #define tok_punc_token_num 17
  890. static unsigned char punc_char[tok_punc_token_num] = {
  891. '{', '}', '=', ';', '(', ')', ':', '+', '-',
  892. ',', '&', '~', '*', '/', '^', '|', '#'};
  893. static int punc_token[tok_punc_token_num] =
  894. { LEFT_BRACE,
  895. RIGHT_BRACE,
  896. EQUAL_SIGN,
  897. SEMICOLON,
  898. LEFT_PAREN,
  899. RIGHT_PAREN,
  900. COLON,
  901. PLUS,
  902. MINUS,
  903. COMMA,
  904. AND,
  905. NOT,
  906. MULTIPLY,
  907. DIVIDE,
  908. XOR,
  909. OR,
  910. POUND };
  911. /* Tables to correlate token numbers and ASCII value pairs for
  912. punctuation characters. Used by yylex and lex_issue_error. */
  913. static unsigned char punc2_char[2] = {'<', '>'};
  914. static int punc2_token[2] =
  915. { LEFT_SHIFT,
  916. RIGHT_SHIFT };
  917. /*
  918. **++
  919. ** FUNCTIONAL DESCRIPTION:
  920. **
  921. ** This function returns the next token to be built in the UIL source
  922. ** program being read by the compiler.
  923. **
  924. ** FORMAL PARAMETERS:
  925. **
  926. ** none
  927. **
  928. ** IMPLICIT INPUTS:
  929. **
  930. **
  931. **
  932. ** IMPLICIT OUTPUTS:
  933. **
  934. **
  935. **
  936. ** FUNCTION VALUE:
  937. **
  938. ** number of the token generated
  939. **
  940. ** SIDE EFFECTS:
  941. **
  942. **
  943. **
  944. **
  945. **--
  946. **/
  947. int yylex()
  948. {
  949. unsigned char c_char; /* current character */
  950. int l_class; /* current character's class */
  951. int l_state; /* current token_table state */
  952. int l_lex_pos; /* next available position in c_lex_buffer*/
  953. cell z_cell; /* local copy of current token_table state*/
  954. int l_bslash_value; /* current value of \digit...\ construct */
  955. int l_start_src_pos; /* starting source position of a token */
  956. int l_charset; /* character set for strings */
  957. int l_write_direction; /* writing direction */
  958. int l_parse_direction; /* parsing direction */
  959. int l_charset_sixteen_bit = FALSE; /* true if charset is 16-bit */
  960. sym_value_entry_type
  961. *az_charset_entry; /* value entry for the current charset */
  962. int l_16bit_char_count = 0; /* for strings, count 16-bit chars */
  963. int l_16bit_chars_only; /* True if the gstr we are */
  964. /* processing only has 16-bit chars */
  965. src_source_record_type
  966. *az_start_src_record;/* starting source record of a token */
  967. lex_buffer_type
  968. *az_current_lex_buffer; /* current lexical buffer */
  969. az_charset_entry = (sym_value_entry_type *) 0;
  970. /*
  971. ** Call the Status callback routine to report our progress.
  972. */
  973. /* %COMPLETE (between 0-50) */
  974. Uil_percent_complete =
  975. CEIL((int)( .5 * ((float)Uil_characters_read/(float)Uil_file_size))*100, 50);
  976. if (Uil_cmd_z_command.status_cb != (Uil_continue_type(*)())NULL)
  977. diag_report_status();
  978. initialize_token_builder:
  979. /* initialize the lexical analyzer by
  980. * saving starting source position of the token
  981. * resetting the lexical buffer
  982. * putting the analyser in its initial state */
  983. az_start_src_record = src_az_current_source_record;
  984. l_start_src_pos = src_az_current_source_buffer->w_current_position;
  985. az_current_lex_buffer = az_first_lex_buffer;
  986. l_lex_pos = 0;
  987. l_charset = lex_k_default_charset;
  988. l_16bit_chars_only = FALSE;
  989. l_state = state_initial;
  990. /* start looking for the token */
  991. continue_in_next_state:
  992. for (;;)
  993. {
  994. /* get next input char */
  995. /* advance source too */
  996. c_char = src_az_current_source_buffer->c_text
  997. [ src_az_current_source_buffer->w_current_position++ ];
  998. /* %COMPLETE */
  999. Uil_characters_read++;
  1000. l_class = class_table[ c_char ]; /* determine its class */
  1001. z_cell = token_table[ l_state][l_class ]; /* load state cell */
  1002. /* pick up the next state, or terminal, or error */
  1003. l_state = z_cell.next_state;
  1004. /* l_state is negative for action states requiring the current
  1005. * character be saved in the current lexical buffer */
  1006. if (z_cell.action & NEGATIVE)
  1007. {
  1008. if (l_lex_pos > l_max_lex_buffer_pos )
  1009. {
  1010. az_current_lex_buffer = get_lex_buffer( az_current_lex_buffer );
  1011. l_lex_pos = 0;
  1012. }
  1013. az_current_lex_buffer->c_text[ l_lex_pos ] = c_char;
  1014. l_lex_pos++;
  1015. }
  1016. /* next step is based on action */
  1017. switch (z_cell.action)
  1018. {
  1019. case move_advance:
  1020. case advance:
  1021. if (l_16bit_chars_only) goto found_16bit_char;
  1022. continue;
  1023. case reset:
  1024. goto initialize_token_builder;
  1025. case move_final:
  1026. case final:
  1027. case final_comment: /* RAP retain comments */
  1028. goto found_token;
  1029. case move_error:
  1030. case error:
  1031. goto found_error;
  1032. case move_special:
  1033. case special:
  1034. goto special_processing;
  1035. default:
  1036. _assert( FALSE, "unknown token_table action" );
  1037. }
  1038. }
  1039. /* process special actions */
  1040. special_processing:
  1041. switch (z_cell.backup) /* backup holds special processing code */
  1042. {
  1043. case control_char: /* encountered a control char in a string or
  1044. * comment - issue a diagnotic and continue */
  1045. issue_control_char_diagnostic( c_char );
  1046. break;
  1047. case start_bslash: /* start of a \ construct in a string */
  1048. l_bslash_value = 0; /* initialize collection cell */
  1049. break;
  1050. case found_digit: /* next digit in a \digit...\ sequence */
  1051. if (l_bslash_value < 256 ) /* if still in range add in next digit */
  1052. l_bslash_value = l_bslash_value * 10 + ( c_char - 48 );
  1053. break;
  1054. case end_digits: /* end of \digit...\ sequence */
  1055. if (l_bslash_value >= 256 ) /* issue a diagnostic */
  1056. { diag_issue_diagnostic
  1057. ( d_out_range,
  1058. src_az_current_source_record,
  1059. src_az_current_source_buffer->w_current_position - 1,
  1060. "\\digit...\\ sequence",
  1061. "0-255" );
  1062. l_bslash_value = lex_k_unprint_sub;
  1063. }
  1064. if (l_bslash_value == 0 ) /* issue a diagnostic */
  1065. { diag_issue_diagnostic
  1066. ( d_null,
  1067. src_az_current_source_record,
  1068. src_az_current_source_buffer->w_current_position - 1 );
  1069. }
  1070. if (l_lex_pos > l_max_lex_buffer_pos )
  1071. {
  1072. az_current_lex_buffer = get_lex_buffer( az_current_lex_buffer );
  1073. l_lex_pos = 0;
  1074. }
  1075. az_current_lex_buffer->c_text[ l_lex_pos++ ] = l_bslash_value;
  1076. break;
  1077. case insert_char: /* place special character in lex buffer */
  1078. {
  1079. static unsigned char c_bslash_char[10] = {
  1080. '\\', '\'', 'n', 't', 'v', 'b', 'r', 'f', '"'};
  1081. static unsigned char ab_bslash_value[9] =
  1082. { 0x5C, 0x27, 0x0A, 0x09, 0x0B, 0x08, 0x0D, 0x0C, 0x22 };
  1083. if (l_lex_pos > l_max_lex_buffer_pos )
  1084. {
  1085. az_current_lex_buffer = get_lex_buffer( az_current_lex_buffer );
  1086. l_lex_pos = 0;
  1087. }
  1088. az_current_lex_buffer->c_text[ l_lex_pos++ ] =
  1089. ab_bslash_value
  1090. [ _index( c_char, c_bslash_char, sizeof( c_bslash_char )-1 )];
  1091. break;
  1092. }
  1093. case missing_bslash: /* \digit...\ sequence not terminated */
  1094. diag_issue_diagnostic
  1095. ( d_unterm_seq,
  1096. src_az_current_source_record,
  1097. src_az_current_source_buffer->w_current_position - 1,
  1098. "\\digit...\\ sequence",
  1099. "with \\" );
  1100. break;
  1101. case ignore_bslash: /* \ not followed by valid character */
  1102. diag_issue_diagnostic
  1103. ( d_backslash_ignored,
  1104. src_az_current_source_record,
  1105. src_az_current_source_buffer->w_current_position - 1,
  1106. c_char );
  1107. break;
  1108. case string_wrap:
  1109. if (src_get_source_line( ) == src_k_end_source)
  1110. {
  1111. diag_issue_diagnostic
  1112. ( d_unterm_seq,
  1113. src_az_current_source_record,
  1114. src_az_current_source_buffer->w_current_position - 1,
  1115. "character string",
  1116. "before end of source" );
  1117. src_az_current_source_buffer->w_current_position--;
  1118. }
  1119. break;
  1120. case comment_wrap:
  1121. if (src_get_source_line( ) == src_k_end_source)
  1122. {
  1123. diag_issue_diagnostic
  1124. ( d_unterm_seq,
  1125. src_az_current_source_record,
  1126. src_az_current_source_buffer->w_current_position - 1,
  1127. "comment",
  1128. "before end of source" );
  1129. src_az_current_source_buffer->w_current_position--;
  1130. return UILEOF;
  1131. }
  1132. az_current_lex_buffer->
  1133. c_text[strlen((char *)az_current_lex_buffer->c_text)] = '\n';
  1134. break;
  1135. case highbit_char: /* check if must accept extra chars */
  1136. found_16bit_char:
  1137. {
  1138. unsigned char next_char;
  1139. /*
  1140. ** If the current character set allows 16-bit characters, then
  1141. ** process them specially.
  1142. */
  1143. if ( l_charset_sixteen_bit )
  1144. {
  1145. /* need to:
  1146. ** 1) get next input char and advance the source
  1147. ** 2) check that the next is not a control character
  1148. ** 3) place the next character in the lex buffer
  1149. */
  1150. next_char = src_az_current_source_buffer->c_text
  1151. [ src_az_current_source_buffer->w_current_position++ ];
  1152. switch (class_table[ next_char ])
  1153. {
  1154. case class_eol:
  1155. src_az_current_source_buffer->w_current_position--;
  1156. case class_illegal:
  1157. issue_control_char_diagnostic( next_char );
  1158. break;
  1159. default:
  1160. break;
  1161. }
  1162. if (l_lex_pos > l_max_lex_buffer_pos )
  1163. {
  1164. az_current_lex_buffer = get_lex_buffer( az_current_lex_buffer );
  1165. l_lex_pos = 0;
  1166. }
  1167. az_current_lex_buffer->c_text[ l_lex_pos++ ] = next_char;
  1168. l_16bit_char_count ++;
  1169. }
  1170. break;
  1171. }
  1172. case charset_gstr: /* set l_charset with the char set */
  1173. {
  1174. _assert( FALSE, "Should never get to charset_gstr" );
  1175. break;
  1176. }
  1177. case nocharset_gstr:
  1178. if (Uil_lex_l_charset_specified) {
  1179. /* use the specified charset*/
  1180. l_charset = Uil_lex_l_literal_charset;
  1181. az_charset_entry = Uil_lex_az_literal_charset;
  1182. }
  1183. else if (Uil_lex_l_localized) goto found_localized_string;
  1184. else {
  1185. /* No charset specified, use the default */
  1186. l_charset = Uil_lex_l_user_default_charset;
  1187. az_charset_entry = Uil_lex_az_charset_entry;
  1188. }
  1189. /* Get the charset information */
  1190. sem_charset_info
  1191. (l_charset,
  1192. az_charset_entry,
  1193. &l_write_direction,
  1194. &l_parse_direction,
  1195. &l_charset_sixteen_bit);
  1196. /* reset 16 bit character count to 0 */
  1197. l_16bit_char_count = 0;
  1198. /*
  1199. ** if this is a user-defined, 16-bit charset then treat all
  1200. ** as 16-bit.
  1201. */
  1202. if ((l_charset_sixteen_bit) && (l_charset == lex_k_userdefined_charset))
  1203. l_16bit_chars_only = TRUE;
  1204. break;
  1205. default:
  1206. _assert( FALSE, "unknown token_table special action" );
  1207. }
  1208. /* Next state of the token builder is should already be in l_state.
  1209. * Continue at this point */
  1210. goto continue_in_next_state;
  1211. found_localized_string:
  1212. {
  1213. /* Local variables */
  1214. int mb_len, i;
  1215. unsigned char mb_byte;
  1216. /* Should be looking at the first byte of the string. */
  1217. /* Localize... */
  1218. _MrmOSSetLocale("");
  1219. /* Parse the string. */
  1220. while (TRUE)
  1221. {
  1222. mb_len = mblen((char *)&src_az_current_source_buffer->c_text
  1223. [src_az_current_source_buffer->w_current_position],
  1224. MB_CUR_MAX);
  1225. mb_byte = src_az_current_source_buffer->c_text
  1226. [src_az_current_source_buffer->w_current_position];
  1227. if (mb_len == 1)
  1228. switch (class_table[mb_byte])
  1229. {
  1230. case class_eol:
  1231. z_cell.backup = backup_0;
  1232. l_state = token_ugstr;
  1233. if (l_lex_pos > l_max_lex_buffer_pos )
  1234. {
  1235. az_current_lex_buffer =
  1236. get_lex_buffer( az_current_lex_buffer );
  1237. l_lex_pos = 0;
  1238. }
  1239. az_current_lex_buffer->c_text[ l_lex_pos++ ] = mb_byte;
  1240. _MrmOSSetLocale("C");
  1241. goto found_token;
  1242. case class_dquote:
  1243. z_cell.backup = backup_0;
  1244. l_state = token_lstr;
  1245. src_az_current_source_buffer->w_current_position++;
  1246. _MrmOSSetLocale("C");
  1247. goto found_token;
  1248. default:
  1249. break;
  1250. }
  1251. if (l_lex_pos > l_max_lex_buffer_pos )
  1252. {
  1253. az_current_lex_buffer = get_lex_buffer( az_current_lex_buffer );
  1254. l_lex_pos = 0;
  1255. }
  1256. for (i = 0; i < mb_len; i++)
  1257. {
  1258. if (l_lex_pos > l_max_lex_buffer_pos )
  1259. {
  1260. az_current_lex_buffer = get_lex_buffer(az_current_lex_buffer);
  1261. l_lex_pos = 0;
  1262. }
  1263. az_current_lex_buffer->c_text[l_lex_pos++] =
  1264. src_az_current_source_buffer->c_text
  1265. [src_az_current_source_buffer->w_current_position++];
  1266. }
  1267. }
  1268. }
  1269. found_token:
  1270. /* do any backup of the source buffer position and lex buffer */
  1271. src_az_current_source_buffer->w_current_position -= z_cell.backup;
  1272. l_lex_pos -= z_cell.backup;
  1273. /* put a null at the end of the current lex buffer */
  1274. az_current_lex_buffer->c_text[ l_lex_pos ] = 0;
  1275. /* case on the token found */
  1276. switch (l_state) /* l_state holds the token found */
  1277. {
  1278. case token_name:
  1279. {
  1280. key_keytable_entry_type *az_keyword;
  1281. /* check the case sensitivity flag and change case if necessary */
  1282. if (! uil_v_case_sensitive)
  1283. {
  1284. char * ptr;
  1285. for ( ptr = (char *)(az_current_lex_buffer->c_text);
  1286. (* ptr) != 0;
  1287. (* ptr) = _upper (* ptr), ptr++)
  1288. {}
  1289. }
  1290. /* check if the name is a keyword */
  1291. az_keyword =
  1292. key_find_keyword( l_lex_pos, (char *)az_current_lex_buffer->c_text );
  1293. if( az_keyword != NULL)
  1294. {
  1295. /* check that the length of the name is in range */
  1296. if (l_lex_pos > key_k_keyword_max_length)
  1297. {
  1298. l_lex_pos = key_k_keyword_max_length;
  1299. az_current_lex_buffer->c_text[ l_lex_pos ] = 0;
  1300. diag_issue_diagnostic
  1301. ( d_name_too_long,
  1302. az_start_src_record,
  1303. l_start_src_pos,
  1304. az_current_lex_buffer->c_text );
  1305. }
  1306. yylval.value.az_keyword_entry = az_keyword;
  1307. yylval.b_type = az_keyword->b_token;
  1308. break;
  1309. }
  1310. /* process the name as an identifier */
  1311. /* check that the length of the identifier is in range */
  1312. /* Added for fix to CR 5566 */
  1313. if (l_lex_pos > lex_k_identifier_max_length)
  1314. {
  1315. l_lex_pos = lex_k_identifier_max_length;
  1316. az_current_lex_buffer->c_text[ l_lex_pos ] = 0;
  1317. diag_issue_diagnostic
  1318. ( d_name_too_long,
  1319. az_start_src_record,
  1320. l_start_src_pos,
  1321. lex_k_identifier_max_length,
  1322. az_current_lex_buffer->c_text );
  1323. }
  1324. yylval.value.az_symbol_entry =
  1325. (sym_entry_type *) sym_insert_name( l_lex_pos, (char *)az_current_lex_buffer->c_text );
  1326. yylval.b_type = NAME;
  1327. break;
  1328. }
  1329. case token_punc:
  1330. {
  1331. int l_token;
  1332. /* found a punctuation mark - look up its token number in a table */
  1333. l_token = punc_token
  1334. [ _index( az_current_lex_buffer->c_text[ 0 ],
  1335. punc_char, sizeof( punc_char )) ];
  1336. yylval.b_type = l_token;
  1337. break;
  1338. }
  1339. case token_punc2:
  1340. {
  1341. int l_token;
  1342. /* found a punctuation mark - look up its token number in a table */
  1343. l_token = punc2_token
  1344. [ _index( az_current_lex_buffer->c_text[ 0 ],
  1345. punc2_char, sizeof( punc2_char )) ];
  1346. yylval.b_type = l_token;
  1347. break;
  1348. }
  1349. case token_eol:
  1350. /* if there is no more source
  1351. * then return an end of file
  1352. * otherwise go look for the next token */
  1353. if (src_get_source_line( ) == src_k_end_source)
  1354. return UILEOF;
  1355. goto initialize_token_builder;
  1356. case token_integer:
  1357. {
  1358. long l_integer;
  1359. yylval.b_type = UNS_INT_LITERAL;
  1360. /* convert the text to binary
  1361. * the sign of the number is applied as part of semantic
  1362. * analysis; thus we only handle integers in range 0..2**31-1
  1363. */
  1364. errno = 0;
  1365. l_integer = cvt_ascii_to_long(az_current_lex_buffer->c_text);
  1366. if (errno != 0)
  1367. diag_issue_diagnostic
  1368. ( d_out_range,
  1369. az_start_src_record,
  1370. l_start_src_pos,
  1371. "integer",
  1372. " " );
  1373. yylval.value.az_symbol_entry =
  1374. (sym_entry_type *)sem_create_value_entry
  1375. ( (char *)&l_integer, sizeof( long ), sym_k_integer_value );
  1376. break;
  1377. }
  1378. case token_ustring:
  1379. diag_issue_diagnostic
  1380. ( d_unterm_seq,
  1381. src_az_current_source_record,
  1382. src_az_current_source_buffer->w_current_position - 1,
  1383. "character string",
  1384. "before end of line" );
  1385. case token_comment: /* RAP preserve comments */
  1386. {
  1387. int size;
  1388. if (last_token_seen != token_comment)
  1389. comment_text[0]=0;
  1390. size = (int)strlen((char *)az_current_lex_buffer->c_text)+1;
  1391. if ((size + (int)strlen (comment_text)) >= comment_size)
  1392. {
  1393. comment_text = XtRealloc(comment_text, INCR_COMMENT_SIZE + strlen(comment_text));
  1394. comment_size = INCR_COMMENT_SIZE + strlen (comment_text);
  1395. }
  1396. strcat (comment_text, (char *)az_current_lex_buffer->c_text);
  1397. strcat (comment_text, "\n");
  1398. last_token_seen = token_comment;
  1399. goto initialize_token_builder;
  1400. }
  1401. case token_string:
  1402. found_primitive_string:
  1403. {
  1404. int l_length;
  1405. lex_buffer_type *az_lex_buffer;
  1406. sym_value_entry_type *az_value;
  1407. unsigned char *c_char;
  1408. l_length = l_lex_pos;
  1409. for (az_lex_buffer = az_first_lex_buffer;
  1410. az_lex_buffer != az_current_lex_buffer;
  1411. az_lex_buffer = az_lex_buffer->az_next_buffer)
  1412. l_length = l_length + l_max_lex_buffer_pos + 1;
  1413. az_value = create_str_entry( l_length, l_charset, az_charset_entry );
  1414. c_char = (unsigned char *)az_value->value.c_value;
  1415. for (az_lex_buffer = az_first_lex_buffer;
  1416. az_lex_buffer != az_current_lex_buffer;
  1417. az_lex_buffer = az_lex_buffer->az_next_buffer)
  1418. {
  1419. _move( c_char, az_lex_buffer->c_text, l_max_lex_buffer_pos + 1);
  1420. c_char = c_char + l_max_lex_buffer_pos + 1;
  1421. }
  1422. _move( c_char, az_lex_buffer->c_text, l_lex_pos );
  1423. yylval.value.az_symbol_entry = (sym_entry_type *) az_value;
  1424. yylval.b_type = CHAR_8_LITERAL;
  1425. break;
  1426. }
  1427. case token_real:
  1428. {
  1429. double d_real;
  1430. yylval.b_type = UNS_FLOAT_LITERAL;
  1431. errno = 0;
  1432. d_real = atof((char *)az_current_lex_buffer->c_text);
  1433. if (errno != 0)
  1434. diag_issue_diagnostic
  1435. ( d_out_range,
  1436. az_start_src_record,
  1437. l_start_src_pos,
  1438. "real",
  1439. " " );
  1440. yylval.value.az_symbol_entry =
  1441. (sym_entry_type *)sem_create_value_entry
  1442. ( (char *)&d_real, sizeof( double ), sym_k_float_value );
  1443. break;
  1444. }
  1445. case token_ff:
  1446. if (l_start_src_pos != 0)
  1447. issue_control_char_diagnostic( c_char );
  1448. src_az_current_source_record->b_flags |= src_m_form_feed;
  1449. goto initialize_token_builder;
  1450. case token_ugstr:
  1451. diag_issue_diagnostic
  1452. ( d_unterm_seq,
  1453. src_az_current_source_record,
  1454. src_az_current_source_buffer->w_current_position - 1,
  1455. "character string",
  1456. "before end of line" );
  1457. case token_gstr:
  1458. /*
  1459. ** Some general strings require special processing. Those
  1460. ** that do not can go thru the normal string code.
  1461. */
  1462. if ( l_parse_direction == XmSTRING_DIRECTION_R_TO_L )
  1463. {
  1464. int i,j;
  1465. unsigned char tmp1;
  1466. unsigned char tmp2;
  1467. /* assuming the string is confined to a single lex buffer. */
  1468. /* just flip the characters around. 16-bit characters need to */
  1469. /* be done in groups of two bytes */
  1470. if (l_charset_sixteen_bit != TRUE)
  1471. /*
  1472. ** Just reverse the bytes from the first to last
  1473. */
  1474. for (i=0, j=l_lex_pos-1; i < (l_lex_pos>>1); i++,j--)
  1475. {
  1476. tmp1 = az_current_lex_buffer->c_text[ i ];
  1477. az_current_lex_buffer->c_text[ i ] =
  1478. az_current_lex_buffer->c_text[ j ];
  1479. az_current_lex_buffer->c_text[ j ] = tmp1;
  1480. }
  1481. /*
  1482. ** Don't reverse the string if have less than 2 characters (4 bytes)
  1483. */
  1484. else if ((l_lex_pos>>1) > 1)
  1485. /*
  1486. ** This reversing doesn't work for mixed 8/16-bit character
  1487. ** sets, but only built-in character sets allow mixing and
  1488. ** they are not right-to-left. We do the same copying as in
  1489. ** the 8-bit case above, but we move two bytes at a time and
  1490. ** reverse the order as we copy so they end up correct.
  1491. */
  1492. for (i=0, j=l_lex_pos-1; i < (l_lex_pos>>1); i+=2,j-=2)
  1493. {
  1494. tmp1 = az_current_lex_buffer->c_text[ i ];
  1495. tmp2 = az_current_lex_buffer->c_text[ i + 1 ];
  1496. az_current_lex_buffer->c_text[ i ] =
  1497. az_current_lex_buffer->c_text[ j - 1 ];
  1498. az_current_lex_buffer->c_text[ i + 1 ] =
  1499. az_current_lex_buffer->c_text[ j ];
  1500. az_current_lex_buffer->c_text[ j ] = tmp2;
  1501. az_current_lex_buffer->c_text[ j - 1 ] = tmp1;
  1502. }
  1503. }
  1504. /*
  1505. ** If the string isn't 16-bit or it is userdefined and thus
  1506. ** cannot be mixed 8/16-bit then we can just make a primitive
  1507. ** string.
  1508. */
  1509. if ((l_charset_sixteen_bit != TRUE) ||
  1510. (l_charset == lex_k_userdefined_charset))
  1511. goto found_primitive_string;
  1512. else
  1513. {
  1514. sym_value_entry_type *cstr_entry;
  1515. sym_value_entry_type *str_entry;
  1516. int a_off, off;
  1517. /*
  1518. ** if string consists solely of 8-bit ascii characters,
  1519. ** l_16bit_char_count will be zero.
  1520. ** if string consists solely of 16 bit characters,
  1521. ** l_16bit_char_count*2 will equal l_lex_pos.
  1522. ** In either of these cases, the result is still a
  1523. ** primitive string.
  1524. */
  1525. /*
  1526. ** For KANJI and HANZI treat 8-bit characters as ISO_LATIN1.
  1527. */
  1528. if (l_16bit_char_count == 0)
  1529. {
  1530. l_charset = uil_sym_isolatin1_charset;
  1531. goto found_primitive_string;
  1532. }
  1533. /*
  1534. ** If the string only contains 16-bit characters,
  1535. ** it still can be stored as a primitive string.
  1536. */
  1537. if ((l_16bit_char_count<<1) == l_lex_pos)
  1538. goto found_primitive_string;
  1539. /*
  1540. ** lex buffer is a mix of 8 and 16 bit characters.
  1541. ** need to build a compound string.
  1542. */
  1543. cstr_entry = sem_create_cstr();
  1544. for ( a_off = 0,
  1545. off = 0;
  1546. off < l_lex_pos;
  1547. )
  1548. {
  1549. for (off = a_off; off < l_lex_pos; off++)
  1550. if (az_current_lex_buffer->c_text[ off ] > 0x97)
  1551. break;
  1552. /*
  1553. * Create the 8 bit string with iso_latin1
  1554. */
  1555. if (off > a_off)
  1556. {
  1557. str_entry = create_str_entry
  1558. ( off - a_off,
  1559. uil_sym_isolatin1_charset,
  1560. az_charset_entry );
  1561. _move( str_entry->value.c_value,
  1562. &az_current_lex_buffer->c_text[ a_off ],
  1563. off-a_off );
  1564. sem_append_str_to_cstr( cstr_entry, str_entry, TRUE );
  1565. }
  1566. for (a_off = off; a_off < l_lex_pos; a_off += 2)
  1567. if (az_current_lex_buffer->c_text[ a_off ] <= 0x97)
  1568. break;
  1569. /*
  1570. * Create the 16 bit string with its charset
  1571. */
  1572. if (a_off > off)
  1573. {
  1574. str_entry =
  1575. create_str_entry( a_off - off, l_charset, az_charset_entry );
  1576. _move( str_entry->value.c_value,
  1577. &az_current_lex_buffer->c_text[ off ],
  1578. a_off-off );
  1579. sem_append_str_to_cstr( cstr_entry, str_entry, TRUE );
  1580. }
  1581. }
  1582. yylval.value.az_symbol_entry = (sym_entry_type *)cstr_entry;
  1583. yylval.b_type = COMP_STRING;
  1584. }
  1585. break;
  1586. case token_lstr:
  1587. {
  1588. int l_length = 0;
  1589. lex_buffer_type *az_lex_buffer;
  1590. sym_value_entry_type *str_entry;
  1591. unsigned char *c_char;
  1592. l_length = l_lex_pos;
  1593. for (az_lex_buffer = az_first_lex_buffer;
  1594. az_lex_buffer != az_current_lex_buffer;
  1595. az_lex_buffer = az_lex_buffer->az_next_buffer)
  1596. l_length = l_length + l_max_lex_buffer_pos + 1;
  1597. str_entry = create_str_entry(l_length, lex_k_fontlist_default_tag,
  1598. az_charset_entry );
  1599. c_char = (unsigned char *)str_entry->value.c_value;
  1600. for (az_lex_buffer = az_first_lex_buffer;
  1601. az_lex_buffer != az_current_lex_buffer;
  1602. az_lex_buffer = az_lex_buffer->az_next_buffer)
  1603. {
  1604. _move( c_char, az_lex_buffer->c_text, l_max_lex_buffer_pos + 1);
  1605. c_char = c_char + l_max_lex_buffer_pos + 1;
  1606. }
  1607. _move( c_char, az_lex_buffer->c_text, l_lex_pos );
  1608. yylval.value.az_symbol_entry = (sym_entry_type *)str_entry;
  1609. yylval.b_type = LOC_STRING;
  1610. break;
  1611. }
  1612. default:
  1613. _assert( FALSE, "unknown token table final state" );
  1614. }
  1615. /* RAP we want to keep track of whether we are appending sequential comments */
  1616. last_token_seen = l_state;
  1617. /* set position information in token value */
  1618. yylval.az_source_record = az_start_src_record;
  1619. yylval.b_source_pos = l_start_src_pos;
  1620. yylval.b_source_end = src_az_current_source_buffer->w_current_position; /* was "l_start_src_pos + l_lex_pos;" */
  1621. yylval.b_tag = sar_k_token_frame;
  1622. /* dump the token if requested */
  1623. #if debug_version
  1624. if (uil_v_dump_tokens)
  1625. dump_token( az_current_lex_buffer, l_lex_pos );
  1626. #endif
  1627. /*
  1628. ** save this token
  1629. */
  1630. prev_yylval = yylval;
  1631. /* return the token generated */
  1632. return yylval.b_type;
  1633. found_error:
  1634. /* do any backup of the source buffer position and lex buffer */
  1635. src_az_current_source_buffer->w_current_position -= z_cell.backup;
  1636. l_lex_pos -= z_cell.backup;
  1637. /* put a null at the end of the current lex buffer */
  1638. az_current_lex_buffer->c_text[ l_lex_pos ] = 0;
  1639. /* case on the type of error */
  1640. switch (l_state) /* contains the type of error */
  1641. {
  1642. case bad_prefix:
  1643. /* printable characters that are not part of a token were found */
  1644. diag_issue_diagnostic
  1645. ( d_unknown_seq,
  1646. az_start_src_record,
  1647. l_start_src_pos,
  1648. az_current_lex_buffer->c_text );
  1649. break;
  1650. default:
  1651. _assert( FALSE, "unknown token table error state" );
  1652. break;
  1653. }
  1654. goto initialize_token_builder;
  1655. }
  1656. /*
  1657. **++
  1658. ** FUNCTIONAL DESCRIPTION:
  1659. **
  1660. ** This function initializes the lexical analyzer.
  1661. **
  1662. ** FORMAL PARAMETERS:
  1663. **
  1664. ** none
  1665. **
  1666. ** IMPLICIT INPUTS:
  1667. **
  1668. ** none
  1669. **
  1670. ** IMPLICIT OUTPUTS:
  1671. **
  1672. ** az_first_lex_buffer
  1673. **
  1674. ** FUNCTION VALUE:
  1675. **
  1676. ** void
  1677. **
  1678. ** SIDE EFFECTS:
  1679. **
  1680. ** lexical buffer is allocated
  1681. **
  1682. **--
  1683. **/
  1684. #define UNSCHAR_MINUS_ONE (unsigned char) 255;
  1685. void lex_initialize_analyzer( )
  1686. {
  1687. String language;
  1688. /* RAP preserve comments */
  1689. comment_text = (char *) _get_memory(INITIAL_COMMENT_SIZE);
  1690. comment_size = INITIAL_COMMENT_SIZE;
  1691. comment_text[0] = '\0';
  1692. /* BEGIN OSF Fix CR 4749 */
  1693. /* The lex algorithm has the potential to write
  1694. * into index l_max_lex_buffer_pos + 1, so allocate l_max_lex_buffer_pos
  1695. * plus 2 positions in buffer.
  1696. */
  1697. az_first_lex_buffer =
  1698. (lex_buffer_type *) _get_memory (l_max_lex_buffer_pos + 2 +
  1699. sizeof(lex_buffer_type *));
  1700. /* END OSF Fix CR 4749 */
  1701. az_first_lex_buffer->az_next_buffer = NULL;
  1702. /* Initialize the stack frame entry for epsilon productions. */
  1703. gz_yynullval.b_tag = sar_k_null_frame;
  1704. /* Initialize the default character set */
  1705. language = (char *) _XmStringGetCurrentCharset();
  1706. if ( language == NULL )
  1707. Uil_lex_l_user_default_charset = lex_k_default_charset;
  1708. else
  1709. {
  1710. Uil_lex_l_user_default_charset = sem_charset_lang_name (language);
  1711. if (Uil_lex_l_user_default_charset == sym_k_error_charset)
  1712. {
  1713. diag_issue_diagnostic
  1714. ( d_bad_lang_value,
  1715. diag_k_no_source,
  1716. diag_k_no_column);
  1717. Uil_lex_l_user_default_charset = lex_k_default_charset;
  1718. }
  1719. }
  1720. Uil_lex_az_charset_entry = NULL;
  1721. /* Determine if localized strings are possible */
  1722. if (Uil_cmd_z_command.v_use_setlocale == FALSE)
  1723. Uil_lex_l_localized = FALSE;
  1724. else
  1725. {
  1726. Uil_lex_l_localized = TRUE;
  1727. _MrmOSSetLocale("C");
  1728. }
  1729. /* Initialize the current character set */
  1730. Uil_lex_l_charset_specified = FALSE;
  1731. /* Initialize the source position and record */
  1732. prev_yylval.b_source_end = UNSCHAR_MINUS_ONE;
  1733. prev_yylval.az_source_record = src_az_current_source_record;
  1734. }
  1735. /*
  1736. **++
  1737. ** FUNCTIONAL DESCRIPTION:
  1738. **
  1739. ** This function performs the cleanup processing of the lexical analyzer.
  1740. **
  1741. ** FORMAL PARAMETERS:
  1742. **
  1743. ** none
  1744. **
  1745. ** IMPLICIT INPUTS:
  1746. **
  1747. ** az_first_lex_buffer
  1748. **
  1749. ** IMPLICIT OUTPUTS:
  1750. **
  1751. ** az_first_lex_buffer
  1752. **
  1753. ** FUNCTION VALUE:
  1754. **
  1755. ** void
  1756. **
  1757. ** SIDE EFFECTS:
  1758. **
  1759. ** lexical buffer is freed
  1760. **
  1761. **--
  1762. **/
  1763. void Uil_lex_cleanup_analyzer( )
  1764. {
  1765. /* pointer to next buffer to free */
  1766. lex_buffer_type *az_buffer_to_free;
  1767. /* Loop through the list of buffers freeing them all */
  1768. while (az_first_lex_buffer != NULL) {
  1769. az_buffer_to_free = az_first_lex_buffer;
  1770. az_first_lex_buffer = az_first_lex_buffer->az_next_buffer;
  1771. _free_memory((char*)az_buffer_to_free);
  1772. }
  1773. }
  1774. /*
  1775. **++
  1776. ** FUNCTIONAL DESCRIPTION:
  1777. **
  1778. ** This function issues a syntax error. It is called from the
  1779. ** error handling mechanism in the parser.
  1780. **
  1781. ** FORMAL PARAMETERS:
  1782. **
  1783. ** restart_token the token number for the punctuation
  1784. ** character where parsing will resume after
  1785. ** this error is issued.
  1786. **
  1787. ** IMPLICIT INPUTS:
  1788. **
  1789. ** current lex buffer
  1790. ** punc_token and punc_char tables
  1791. **
  1792. ** IMPLICIT OUTPUTS:
  1793. **
  1794. ** none
  1795. **
  1796. ** FUNCTION VALUE:
  1797. **
  1798. ** void
  1799. **
  1800. ** SIDE EFFECTS:
  1801. **
  1802. ** issue a diagnostic
  1803. **
  1804. **--
  1805. **/
  1806. void lex_issue_error( restart_token )
  1807. int restart_token;
  1808. {
  1809. int i, token_num;
  1810. unsigned char c_char = '.';
  1811. char * tok_name;
  1812. /* Find the token number for the restart character in the table.
  1813. It should be there. Get the corresponding character for this
  1814. token. */
  1815. for ( i = 0 ; i<tok_punc_token_num ; i++ )
  1816. {
  1817. if (restart_token == punc_token [i])
  1818. {
  1819. c_char = punc_char [i];
  1820. break;
  1821. }
  1822. }
  1823. /* Get the text of the token name which caused the error. */
  1824. token_num = yylval.b_type;
  1825. if ( (token_num < 0) || (token_num > tok_num_tokens) )
  1826. tok_name = "UNKNOWN_TOKEN";
  1827. else
  1828. tok_name = tok_token_name_table[token_num];
  1829. /* Issue the error. */
  1830. diag_issue_diagnostic
  1831. (d_syntax,
  1832. yylval.az_source_record,
  1833. yylval.b_source_pos,
  1834. tok_name,
  1835. c_char);
  1836. }
  1837. /*
  1838. **
  1839. ** LOCAL FUNCTIONS
  1840. **
  1841. */
  1842. /*
  1843. **++
  1844. ** FUNCTIONAL DESCRIPTION:
  1845. **
  1846. ** Issue an error for an illegal control character.
  1847. **
  1848. ** FORMAL PARAMETERS:
  1849. **
  1850. ** c_char
  1851. **
  1852. ** IMPLICIT INPUTS:
  1853. **
  1854. ** current source position
  1855. **
  1856. ** IMPLICIT OUTPUTS:
  1857. **
  1858. ** none
  1859. **
  1860. ** FUNCTION VALUE:
  1861. **
  1862. ** void
  1863. **
  1864. ** SIDE EFFECTS:
  1865. **
  1866. ** issue a diagnostic
  1867. **
  1868. **--
  1869. **/
  1870. void issue_control_char_diagnostic
  1871. (unsigned char c_char )
  1872. {
  1873. diag_issue_diagnostic
  1874. ( d_control_char,
  1875. src_az_current_source_record,
  1876. src_az_current_source_buffer->w_current_position - 1,
  1877. c_char );
  1878. src_az_current_source_record->b_flags |= src_m_unprintable_chars;
  1879. return;
  1880. }
  1881. /*
  1882. **++
  1883. ** FUNCTIONAL DESCRIPTION:
  1884. **
  1885. ** This function obtains another lexical buffer.
  1886. **
  1887. ** FORMAL PARAMETERS:
  1888. **
  1889. ** az_current_lex_buffer
  1890. **
  1891. ** IMPLICIT INPUTS:
  1892. **
  1893. ** none
  1894. **
  1895. ** IMPLICIT OUTPUTS:
  1896. **
  1897. ** none
  1898. **
  1899. ** FUNCTION VALUE:
  1900. **
  1901. ** address of a new lexical buffer
  1902. **
  1903. ** SIDE EFFECTS:
  1904. **
  1905. ** another lexical buffer may be allocated
  1906. **
  1907. **--
  1908. **/
  1909. static lex_buffer_type *get_lex_buffer( az_current_lex_buffer )
  1910. lex_buffer_type *az_current_lex_buffer;
  1911. {
  1912. lex_buffer_type *az_lex_buffer;
  1913. /* check to see if another buffer is available - if not allocate one */
  1914. az_lex_buffer = az_current_lex_buffer->az_next_buffer;
  1915. if (az_lex_buffer == NULL)
  1916. {
  1917. /* BEGIN OSF Fix CR 4749 */
  1918. /* The lex algorithm has the potential to write
  1919. * into index l_max_lex_buffer_pos + 1, so allocate l_max_lex_buffer_pos
  1920. * plus 2 positions in buffer.
  1921. */
  1922. az_lex_buffer =
  1923. (lex_buffer_type *)_get_memory( l_max_lex_buffer_pos + 2 +
  1924. sizeof(lex_buffer_type *));
  1925. /* END OSF Fix CR 4749 */
  1926. az_current_lex_buffer->az_next_buffer = az_lex_buffer;
  1927. az_lex_buffer->az_next_buffer = NULL;
  1928. }
  1929. return az_lex_buffer;
  1930. }
  1931. /*
  1932. **++
  1933. ** FUNCTIONAL DESCRIPTION:
  1934. **
  1935. ** This procedure will change all the unprintable characters in
  1936. ** a buffer to lex_k_unprint_sub.
  1937. **
  1938. ** FORMAL PARAMETERS:
  1939. **
  1940. ** buffer buffer to be checked
  1941. ** length length of the buffer
  1942. ** flags lex_m_filter_xxx flags to indicate if additional
  1943. ** characters should be filtered.
  1944. **
  1945. ** IMPLICIT INPUTS:
  1946. **
  1947. ** class_table gives the unprintable characters
  1948. **
  1949. ** IMPLICIT OUTPUTS:
  1950. **
  1951. ** none
  1952. **
  1953. ** FUNCTION VALUE:
  1954. **
  1955. ** void
  1956. **
  1957. ** SIDE EFFECTS:
  1958. **
  1959. ** none
  1960. **
  1961. **--
  1962. **/
  1963. void lex_filter_unprintable_chars
  1964. (unsigned char *buffer,
  1965. int length,
  1966. unsigned long flags )
  1967. {
  1968. int i;
  1969. for (i=0; i<length; i++)
  1970. {
  1971. if ((class_table[ buffer[ i ] ] == class_illegal) ||
  1972. (buffer[ i ] == 12) || /* form feed */
  1973. (buffer[ i ] == 0) || /* null */
  1974. ( (flags & lex_m_filter_tab)
  1975. && buffer[ i ] == 9 )/* horizontal tab */
  1976. )
  1977. buffer[ i ] = lex_k_unprint_sub;
  1978. }
  1979. }
  1980. #if debug_version
  1981. /*
  1982. **++
  1983. ** FUNCTIONAL DESCRIPTION:
  1984. **
  1985. ** This procedure will dump a token.
  1986. **
  1987. ** FORMAL PARAMETERS:
  1988. **
  1989. **
  1990. ** IMPLICIT INPUTS:
  1991. **
  1992. **
  1993. ** IMPLICIT OUTPUTS:
  1994. **
  1995. **
  1996. ** SIDE EFFECTS:
  1997. **
  1998. **
  1999. **--
  2000. **/
  2001. static void dump_token( az_current_lex_buffer,
  2002. l_lex_pos)
  2003. lex_buffer_type *az_current_lex_buffer;
  2004. int l_lex_pos;
  2005. {
  2006. unsigned char c_buffer[l_max_lex_buffer_pos +2];
  2007. lex_buffer_type *az_lex_buffer;
  2008. int i;
  2009. int last;
  2010. int last_buffer;
  2011. unsigned char c_char;
  2012. _debug_output
  2013. ("token: %d start: %d, %d end: %d, %d \n",
  2014. yylval.b_type,
  2015. yylval.az_source_record->w_line_number,
  2016. yylval.b_source_pos,
  2017. src_az_current_source_record->w_line_number,
  2018. src_az_current_source_buffer->w_current_position );
  2019. for (az_lex_buffer = az_first_lex_buffer;
  2020. ;
  2021. az_lex_buffer = az_lex_buffer->az_next_buffer)
  2022. {
  2023. last_buffer = ( az_lex_buffer == az_current_lex_buffer );
  2024. if (last_buffer)
  2025. last = l_lex_pos;
  2026. else
  2027. last = l_max_lex_buffer_pos+1;
  2028. _move( c_buffer, az_lex_buffer->c_text, last );
  2029. lex_filter_unprintable_chars (c_buffer, last, 0);
  2030. c_buffer[ last ] = 0;
  2031. _debug_output("%s \n", c_buffer);
  2032. if (last_buffer)
  2033. return;
  2034. }
  2035. }
  2036. #endif
  2037. /*
  2038. **++
  2039. ** FUNCTIONAL DESCRIPTION:
  2040. **
  2041. ** This function converts a null terminated string to a
  2042. ** longword integer in the range 0..2**31-1. If the ascii value is
  2043. ** outside that range, the external variable errno is set to ERANGE
  2044. ** and the value returned is 2**31-1
  2045. **
  2046. ** FORMAL PARAMETERS:
  2047. **
  2048. ** c_text null terminate string holding integer in ascii
  2049. **
  2050. ** IMPLICIT INPUTS:
  2051. **
  2052. ** none
  2053. **
  2054. ** IMPLICIT OUTPUTS:
  2055. **
  2056. ** errno set if overflow occurs
  2057. **
  2058. ** FUNCTION VALUE:
  2059. **
  2060. ** long integer value of c_text
  2061. **
  2062. ** SIDE EFFECTS:
  2063. **
  2064. ** none
  2065. **
  2066. **--
  2067. **/
  2068. #define k_max_int 2147483647
  2069. #define k_max_div_10 214748364
  2070. long cvt_ascii_to_long(c_text)
  2071. unsigned char XmConst *c_text;
  2072. {
  2073. unsigned long l_value;
  2074. int pos;
  2075. l_value = 0;
  2076. for (pos = 0; c_text[ pos ] != 0; pos++)
  2077. {
  2078. if (l_value >= k_max_div_10)
  2079. goto potential_overflow;
  2080. l_value = (l_value * 10) + c_text[ pos ] - '0';
  2081. }
  2082. return l_value;
  2083. potential_overflow:
  2084. if (l_value > k_max_div_10)
  2085. {
  2086. errno = ERANGE;
  2087. return k_max_int;
  2088. }
  2089. l_value = (l_value * 10) + c_text[ pos ] - '0';
  2090. if (l_value > k_max_int)
  2091. {
  2092. errno = ERANGE;
  2093. return k_max_int;
  2094. }
  2095. return l_value;
  2096. }
  2097. /*
  2098. **++
  2099. ** FUNCTIONAL DESCRIPTION:
  2100. **
  2101. ** This function creates a symbol entry for a primitive string.
  2102. **
  2103. ** FORMAL PARAMETERS:
  2104. **
  2105. ** l_size number of bytes to allocate
  2106. ** l_charset charset of the string (token value)
  2107. ** az_charset_entry charset of the string (symbol table value entry)
  2108. **
  2109. ** IMPLICIT INPUTS:
  2110. **
  2111. ** yylval
  2112. **
  2113. ** IMPLICIT OUTPUTS:
  2114. **
  2115. ** none
  2116. **
  2117. ** FUNCTION VALUE:
  2118. **
  2119. ** symbol node created
  2120. **
  2121. ** SIDE EFFECTS:
  2122. **
  2123. ** none
  2124. **
  2125. **--
  2126. **/
  2127. sym_value_entry_type *create_str_entry (l_size, l_charset, az_charset_entry)
  2128. int l_size;
  2129. int l_charset;
  2130. sym_value_entry_type *az_charset_entry;
  2131. {
  2132. sym_value_entry_type *node;
  2133. int charset; /* from sym_k_..._charset */
  2134. unsigned char direction; /* writing direction */
  2135. /*
  2136. * Determine character set and writing direction
  2137. */
  2138. if (l_charset != lex_k_userdefined_charset)
  2139. {
  2140. charset = sem_map_subclass_to_charset( l_charset );
  2141. direction = charset_writing_direction_table[charset];
  2142. }
  2143. else
  2144. {
  2145. charset = sym_k_userdefined_charset;
  2146. direction = az_charset_entry->b_direction;
  2147. }
  2148. /* size of entry
  2149. * sym_k_value_entry for common part of a value entry
  2150. * l_size for the string
  2151. * 1 for the null on string
  2152. */
  2153. node = (sym_value_entry_type *)
  2154. sem_allocate_node
  2155. ( sym_k_value_entry,
  2156. sym_k_value_entry_size );
  2157. node->value.c_value = XtCalloc(1, l_size + 1);
  2158. node->header.az_src_rec = yylval.az_source_record;
  2159. node->header.b_src_pos = yylval.b_source_pos;
  2160. node->header.b_end_pos = yylval.b_source_end;
  2161. node->b_type = sym_k_char_8_value;
  2162. node->w_length = l_size;
  2163. node->b_charset = charset;
  2164. node->b_direction = direction;
  2165. node->az_charset_value = az_charset_entry;
  2166. node->obj_header.b_flags = sym_m_private | sym_m_builtin;
  2167. return node;
  2168. }