2
0

ParseC.pm 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209
  1. #! /usr/bin/env perl
  2. # Copyright 2018-2021 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License 2.0 (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. package OpenSSL::ParseC;
  9. use strict;
  10. use warnings;
  11. use Exporter;
  12. use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  13. $VERSION = "0.9";
  14. @ISA = qw(Exporter);
  15. @EXPORT = qw(parse);
  16. # Global handler data
  17. my @preprocessor_conds; # A list of simple preprocessor conditions,
  18. # each item being a list of macros defined
  19. # or not defined.
  20. # Handler helpers
  21. sub all_conds {
  22. return map { ( @$_ ) } @preprocessor_conds;
  23. }
  24. # A list of handlers that will look at a "complete" string and try to
  25. # figure out what to make of it.
  26. # Each handler is a hash with the following keys:
  27. #
  28. # regexp a regexp to compare the "complete" string with.
  29. # checker a function that does a more complex comparison.
  30. # Use this instead of regexp if that isn't enough.
  31. # massager massages the "complete" string into an array with
  32. # the following elements:
  33. #
  34. # [0] String that needs further processing (this
  35. # applies to typedefs of structs), or empty.
  36. # [1] The name of what was found.
  37. # [2] A character that denotes what type of thing
  38. # this is: 'F' for function, 'S' for struct,
  39. # 'T' for typedef, 'M' for macro, 'V' for
  40. # variable.
  41. # [3] Return type (only for type 'F' and 'V')
  42. # [4] Value (for type 'M') or signature (for type 'F',
  43. # 'V', 'T' or 'S')
  44. # [5...] The list of preprocessor conditions this is
  45. # found in, as in checks for macro definitions
  46. # (stored as the macro's name) or the absence
  47. # of definition (stored as the macro's name
  48. # prefixed with a '!'
  49. #
  50. # If the massager returns an empty list, it means the
  51. # "complete" string has side effects but should otherwise
  52. # be ignored.
  53. # If the massager is undefined, the "complete" string
  54. # should be ignored.
  55. my @opensslcpphandlers = (
  56. ##################################################################
  57. # OpenSSL CPP specials
  58. #
  59. # These are used to convert certain pre-precessor expressions into
  60. # others that @cpphandlers have a better chance to understand.
  61. # This changes any OPENSSL_NO_DEPRECATED_x_y[_z] check to a check of
  62. # OPENSSL_NO_DEPRECATEDIN_x_y[_z]. That's due to <openssl/macros.h>
  63. # creating OPENSSL_NO_DEPRECATED_x_y[_z], but the ordinals files using
  64. # DEPRECATEDIN_x_y[_z].
  65. { regexp => qr/#if(def|ndef) OPENSSL_NO_DEPRECATED_(\d+_\d+(?:_\d+)?)$/,
  66. massager => sub {
  67. return (<<"EOF");
  68. #if$1 OPENSSL_NO_DEPRECATEDIN_$2
  69. EOF
  70. }
  71. }
  72. );
  73. my @cpphandlers = (
  74. ##################################################################
  75. # CPP stuff
  76. { regexp => qr/#ifdef ?(.*)/,
  77. massager => sub {
  78. my %opts;
  79. if (ref($_[$#_]) eq "HASH") {
  80. %opts = %{$_[$#_]};
  81. pop @_;
  82. }
  83. push @preprocessor_conds, [ $1 ];
  84. print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
  85. if $opts{debug};
  86. return ();
  87. },
  88. },
  89. { regexp => qr/#ifndef ?(.*)/,
  90. massager => sub {
  91. my %opts;
  92. if (ref($_[$#_]) eq "HASH") {
  93. %opts = %{$_[$#_]};
  94. pop @_;
  95. }
  96. push @preprocessor_conds, [ '!'.$1 ];
  97. print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
  98. if $opts{debug};
  99. return ();
  100. },
  101. },
  102. { regexp => qr/#if (0|1)/,
  103. massager => sub {
  104. my %opts;
  105. if (ref($_[$#_]) eq "HASH") {
  106. %opts = %{$_[$#_]};
  107. pop @_;
  108. }
  109. if ($1 eq "1") {
  110. push @preprocessor_conds, [ "TRUE" ];
  111. } else {
  112. push @preprocessor_conds, [ "!TRUE" ];
  113. }
  114. print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
  115. if $opts{debug};
  116. return ();
  117. },
  118. },
  119. { regexp => qr/#if ?(.*)/,
  120. massager => sub {
  121. my %opts;
  122. if (ref($_[$#_]) eq "HASH") {
  123. %opts = %{$_[$#_]};
  124. pop @_;
  125. }
  126. my @results = ();
  127. my $conds = $1;
  128. if ($conds =~ m|^defined<<<\(([^\)]*)\)>>>(.*)$|) {
  129. push @results, $1; # Handle the simple case
  130. my $rest = $2;
  131. my $re = qr/^(?:\|\|defined<<<\([^\)]*\)>>>)*$/;
  132. print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n"
  133. if $opts{debug};
  134. if ($rest =~ m/$re/) {
  135. my @rest = split /\|\|/, $rest;
  136. shift @rest;
  137. foreach (@rest) {
  138. m|^defined<<<\(([^\)]*)\)>>>$|;
  139. die "Something wrong...$opts{PLACE}" if $1 eq "";
  140. push @results, $1;
  141. }
  142. } else {
  143. $conds =~ s/<<<|>>>//g;
  144. warn "Warning: complicated #if expression(1): $conds$opts{PLACE}"
  145. if $opts{warnings};
  146. }
  147. } elsif ($conds =~ m|^!defined<<<\(([^\)]*)\)>>>(.*)$|) {
  148. push @results, '!'.$1; # Handle the simple case
  149. my $rest = $2;
  150. my $re = qr/^(?:\&\&!defined<<<\([^\)]*\)>>>)*$/;
  151. print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n"
  152. if $opts{debug};
  153. if ($rest =~ m/$re/) {
  154. my @rest = split /\&\&/, $rest;
  155. shift @rest;
  156. foreach (@rest) {
  157. m|^!defined<<<\(([^\)]*)\)>>>$|;
  158. die "Something wrong...$opts{PLACE}" if $1 eq "";
  159. push @results, '!'.$1;
  160. }
  161. } else {
  162. $conds =~ s/<<<|>>>//g;
  163. warn "Warning: complicated #if expression(2): $conds$opts{PLACE}"
  164. if $opts{warnings};
  165. }
  166. } else {
  167. $conds =~ s/<<<|>>>//g;
  168. warn "Warning: complicated #if expression(3): $conds$opts{PLACE}"
  169. if $opts{warnings};
  170. }
  171. print STDERR "DEBUG[",$opts{debug_type},"]: Added preprocessor conds: '", join("', '", @results), "'\n"
  172. if $opts{debug};
  173. push @preprocessor_conds, [ @results ];
  174. print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
  175. if $opts{debug};
  176. return ();
  177. },
  178. },
  179. { regexp => qr/#elif (.*)/,
  180. massager => sub {
  181. my %opts;
  182. if (ref($_[$#_]) eq "HASH") {
  183. %opts = %{$_[$#_]};
  184. pop @_;
  185. }
  186. die "An #elif without corresponding condition$opts{PLACE}"
  187. if !@preprocessor_conds;
  188. pop @preprocessor_conds;
  189. print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
  190. if $opts{debug};
  191. return (<<"EOF");
  192. #if $1
  193. EOF
  194. },
  195. },
  196. { regexp => qr/#else/,
  197. massager => sub {
  198. my %opts;
  199. if (ref($_[$#_]) eq "HASH") {
  200. %opts = %{$_[$#_]};
  201. pop @_;
  202. }
  203. die "An #else without corresponding condition$opts{PLACE}"
  204. if !@preprocessor_conds;
  205. # Invert all conditions on the last level
  206. my $stuff = pop @preprocessor_conds;
  207. push @preprocessor_conds, [
  208. map { m|^!(.*)$| ? $1 : '!'.$_ } @$stuff
  209. ];
  210. print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
  211. if $opts{debug};
  212. return ();
  213. },
  214. },
  215. { regexp => qr/#endif ?/,
  216. massager => sub {
  217. my %opts;
  218. if (ref($_[$#_]) eq "HASH") {
  219. %opts = %{$_[$#_]};
  220. pop @_;
  221. }
  222. die "An #endif without corresponding condition$opts{PLACE}"
  223. if !@preprocessor_conds;
  224. pop @preprocessor_conds;
  225. print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
  226. if $opts{debug};
  227. return ();
  228. },
  229. },
  230. { regexp => qr/#define ([[:alpha:]_]\w*)(<<<\(.*?\)>>>)?( (.*))?/,
  231. massager => sub {
  232. my $name = $1;
  233. my $params = $2;
  234. my $spaceval = $3||"";
  235. my $val = $4||"";
  236. return ("",
  237. $1, 'M', "", $params ? "$name$params$spaceval" : $val,
  238. all_conds()); }
  239. },
  240. { regexp => qr/#.*/,
  241. massager => sub { return (); }
  242. },
  243. );
  244. my @opensslchandlers = (
  245. ##################################################################
  246. # OpenSSL C specials
  247. #
  248. # They are really preprocessor stuff, but they look like C stuff
  249. # to this parser. All of these do replacements, anything else is
  250. # an error.
  251. #####
  252. # Deprecated stuff, by OpenSSL release.
  253. # OSSL_DEPRECATEDIN_x_y[_z] is simply ignored. Such declarations are
  254. # supposed to be guarded with an '#ifdef OPENSSL_NO_DEPRECATED_x_y[_z]'
  255. { regexp => qr/OSSL_DEPRECATEDIN_\d+_\d+(?:_\d+)?\s+(.*)/,
  256. massager => sub { return $1; },
  257. },
  258. { regexp => qr/(.*?)\s+OSSL_DEPRECATEDIN_\d+_\d+(?:_\d+)?\s+(.*)/,
  259. massager => sub { return "$1 $2"; },
  260. },
  261. #####
  262. # Core stuff
  263. # OSSL_CORE_MAKE_FUNC is a macro to create the necessary data and inline
  264. # function the libcrypto<->provider interface
  265. { regexp => qr/OSSL_CORE_MAKE_FUNC<<<\((.*?),(.*?),(.*?)\)>>>/,
  266. massager => sub {
  267. return (<<"EOF");
  268. typedef $1 OSSL_FUNC_$2_fn$3;
  269. static ossl_inline OSSL_FUNC_$2_fn *OSSL_FUNC_$2(const OSSL_DISPATCH *opf);
  270. EOF
  271. },
  272. },
  273. #####
  274. # LHASH stuff
  275. # LHASH_OF(foo) is used as a type, but the chandlers won't take it
  276. # gracefully, so we expand it here.
  277. { regexp => qr/(.*)\bLHASH_OF<<<\((.*?)\)>>>(.*)/,
  278. massager => sub { return ("$1struct lhash_st_$2$3"); }
  279. },
  280. { regexp => qr/DEFINE_LHASH_OF(?:_INTERNAL|_EX)?<<<\((.*)\)>>>/,
  281. massager => sub {
  282. return (<<"EOF");
  283. static ossl_inline LHASH_OF($1) * lh_$1_new(unsigned long (*hfn)(const $1 *),
  284. int (*cfn)(const $1 *, const $1 *));
  285. static ossl_inline void lh_$1_free(LHASH_OF($1) *lh);
  286. static ossl_inline $1 *lh_$1_insert(LHASH_OF($1) *lh, $1 *d);
  287. static ossl_inline $1 *lh_$1_delete(LHASH_OF($1) *lh, const $1 *d);
  288. static ossl_inline $1 *lh_$1_retrieve(LHASH_OF($1) *lh, const $1 *d);
  289. static ossl_inline int lh_$1_error(LHASH_OF($1) *lh);
  290. static ossl_inline unsigned long lh_$1_num_items(LHASH_OF($1) *lh);
  291. static ossl_inline void lh_$1_node_stats_bio(const LHASH_OF($1) *lh, BIO *out);
  292. static ossl_inline void lh_$1_node_usage_stats_bio(const LHASH_OF($1) *lh,
  293. BIO *out);
  294. static ossl_inline void lh_$1_stats_bio(const LHASH_OF($1) *lh, BIO *out);
  295. static ossl_inline unsigned long lh_$1_get_down_load(LHASH_OF($1) *lh);
  296. static ossl_inline void lh_$1_set_down_load(LHASH_OF($1) *lh, unsigned long dl);
  297. static ossl_inline void lh_$1_doall(LHASH_OF($1) *lh, void (*doall)($1 *));
  298. LHASH_OF($1)
  299. EOF
  300. }
  301. },
  302. #####
  303. # STACK stuff
  304. # STACK_OF(foo) is used as a type, but the chandlers won't take it
  305. # gracefully, so we expand it here.
  306. { regexp => qr/(.*)\bSTACK_OF<<<\((.*?)\)>>>(.*)/,
  307. massager => sub { return ("$1struct stack_st_$2$3"); }
  308. },
  309. # { regexp => qr/(.*)\bSTACK_OF\((.*?)\)(.*)/,
  310. # massager => sub {
  311. # my $before = $1;
  312. # my $stack_of = "struct stack_st_$2";
  313. # my $after = $3;
  314. # if ($after =~ m|^\w|) { $after = " ".$after; }
  315. # return ("$before$stack_of$after");
  316. # }
  317. # },
  318. { regexp => qr/SKM_DEFINE_STACK_OF<<<\((.*),\s*(.*),\s*(.*)\)>>>/,
  319. massager => sub {
  320. return (<<"EOF");
  321. STACK_OF($1);
  322. typedef int (*sk_$1_compfunc)(const $3 * const *a, const $3 *const *b);
  323. typedef void (*sk_$1_freefunc)($3 *a);
  324. typedef $3 * (*sk_$1_copyfunc)(const $3 *a);
  325. static ossl_inline int sk_$1_num(const STACK_OF($1) *sk);
  326. static ossl_inline $2 *sk_$1_value(const STACK_OF($1) *sk, int idx);
  327. static ossl_inline STACK_OF($1) *sk_$1_new(sk_$1_compfunc compare);
  328. static ossl_inline STACK_OF($1) *sk_$1_new_null(void);
  329. static ossl_inline STACK_OF($1) *sk_$1_new_reserve(sk_$1_compfunc compare,
  330. int n);
  331. static ossl_inline int sk_$1_reserve(STACK_OF($1) *sk, int n);
  332. static ossl_inline void sk_$1_free(STACK_OF($1) *sk);
  333. static ossl_inline void sk_$1_zero(STACK_OF($1) *sk);
  334. static ossl_inline $2 *sk_$1_delete(STACK_OF($1) *sk, int i);
  335. static ossl_inline $2 *sk_$1_delete_ptr(STACK_OF($1) *sk, $2 *ptr);
  336. static ossl_inline int sk_$1_push(STACK_OF($1) *sk, $2 *ptr);
  337. static ossl_inline int sk_$1_unshift(STACK_OF($1) *sk, $2 *ptr);
  338. static ossl_inline $2 *sk_$1_pop(STACK_OF($1) *sk);
  339. static ossl_inline $2 *sk_$1_shift(STACK_OF($1) *sk);
  340. static ossl_inline void sk_$1_pop_free(STACK_OF($1) *sk,
  341. sk_$1_freefunc freefunc);
  342. static ossl_inline int sk_$1_insert(STACK_OF($1) *sk, $2 *ptr, int idx);
  343. static ossl_inline $2 *sk_$1_set(STACK_OF($1) *sk, int idx, $2 *ptr);
  344. static ossl_inline int sk_$1_find(STACK_OF($1) *sk, $2 *ptr);
  345. static ossl_inline int sk_$1_find_ex(STACK_OF($1) *sk, $2 *ptr);
  346. static ossl_inline void sk_$1_sort(STACK_OF($1) *sk);
  347. static ossl_inline int sk_$1_is_sorted(const STACK_OF($1) *sk);
  348. static ossl_inline STACK_OF($1) * sk_$1_dup(const STACK_OF($1) *sk);
  349. static ossl_inline STACK_OF($1) *sk_$1_deep_copy(const STACK_OF($1) *sk,
  350. sk_$1_copyfunc copyfunc,
  351. sk_$1_freefunc freefunc);
  352. static ossl_inline sk_$1_compfunc sk_$1_set_cmp_func(STACK_OF($1) *sk,
  353. sk_$1_compfunc compare);
  354. EOF
  355. }
  356. },
  357. { regexp => qr/SKM_DEFINE_STACK_OF_INTERNAL<<<\((.*),\s*(.*),\s*(.*)\)>>>/,
  358. massager => sub {
  359. return (<<"EOF");
  360. STACK_OF($1);
  361. typedef int (*sk_$1_compfunc)(const $3 * const *a, const $3 *const *b);
  362. typedef void (*sk_$1_freefunc)($3 *a);
  363. typedef $3 * (*sk_$1_copyfunc)(const $3 *a);
  364. static ossl_unused ossl_inline $2 *ossl_check_$1_type($2 *ptr);
  365. static ossl_unused ossl_inline const OPENSSL_STACK *ossl_check_const_$1_sk_type(const STACK_OF($1) *sk);
  366. static ossl_unused ossl_inline OPENSSL_sk_compfunc ossl_check_$1_compfunc_type(sk_$1_compfunc cmp);
  367. static ossl_unused ossl_inline OPENSSL_sk_copyfunc ossl_check_$1_copyfunc_type(sk_$1_copyfunc cpy);
  368. static ossl_unused ossl_inline OPENSSL_sk_freefunc ossl_check_$1_freefunc_type(sk_$1_freefunc fr);
  369. EOF
  370. }
  371. },
  372. { regexp => qr/DEFINE_SPECIAL_STACK_OF<<<\((.*),\s*(.*)\)>>>/,
  373. massager => sub { return ("SKM_DEFINE_STACK_OF($1,$2,$2)"); },
  374. },
  375. { regexp => qr/DEFINE_STACK_OF<<<\((.*)\)>>>/,
  376. massager => sub { return ("SKM_DEFINE_STACK_OF($1,$1,$1)"); },
  377. },
  378. { regexp => qr/DEFINE_SPECIAL_STACK_OF_CONST<<<\((.*),\s*(.*)\)>>>/,
  379. massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $2,$2)"); },
  380. },
  381. { regexp => qr/DEFINE_STACK_OF_CONST<<<\((.*)\)>>>/,
  382. massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $1,$1)"); },
  383. },
  384. #####
  385. # ASN1 stuff
  386. { regexp => qr/DECLARE_ASN1_ITEM<<<\((.*)\)>>>/,
  387. massager => sub {
  388. return (<<"EOF");
  389. const ASN1_ITEM *$1_it(void);
  390. EOF
  391. },
  392. },
  393. { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_only<<<\((.*),\s*(.*)\)>>>/,
  394. massager => sub {
  395. return (<<"EOF");
  396. int d2i_$2(void);
  397. int i2d_$2(void);
  398. EOF
  399. },
  400. },
  401. { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS<<<\((.*),\s*(.*),\s*(.*)\)>>>/,
  402. massager => sub {
  403. return (<<"EOF");
  404. int d2i_$3(void);
  405. int i2d_$3(void);
  406. DECLARE_ASN1_ITEM($2)
  407. EOF
  408. },
  409. },
  410. { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/,
  411. massager => sub {
  412. return (<<"EOF");
  413. int d2i_$2(void);
  414. int i2d_$2(void);
  415. DECLARE_ASN1_ITEM($2)
  416. EOF
  417. },
  418. },
  419. { regexp => qr/DECLARE_ASN1_ALLOC_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/,
  420. massager => sub {
  421. return (<<"EOF");
  422. int $2_free(void);
  423. int $2_new(void);
  424. EOF
  425. },
  426. },
  427. { regexp => qr/DECLARE_ASN1_ALLOC_FUNCTIONS<<<\((.*)\)>>>/,
  428. massager => sub {
  429. return (<<"EOF");
  430. int $1_free(void);
  431. int $1_new(void);
  432. EOF
  433. },
  434. },
  435. { regexp => qr/DECLARE_ASN1_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/,
  436. massager => sub {
  437. return (<<"EOF");
  438. int d2i_$2(void);
  439. int i2d_$2(void);
  440. int $2_free(void);
  441. int $2_new(void);
  442. DECLARE_ASN1_ITEM($2)
  443. EOF
  444. },
  445. },
  446. { regexp => qr/DECLARE_ASN1_FUNCTIONS<<<\((.*)\)>>>/,
  447. massager => sub { return (<<"EOF");
  448. int d2i_$1(void);
  449. int i2d_$1(void);
  450. int $1_free(void);
  451. int $1_new(void);
  452. DECLARE_ASN1_ITEM($1)
  453. EOF
  454. }
  455. },
  456. { regexp => qr/DECLARE_ASN1_NDEF_FUNCTION<<<\((.*)\)>>>/,
  457. massager => sub {
  458. return (<<"EOF");
  459. int i2d_$1_NDEF(void);
  460. EOF
  461. }
  462. },
  463. { regexp => qr/DECLARE_ASN1_PRINT_FUNCTION<<<\((.*)\)>>>/,
  464. massager => sub {
  465. return (<<"EOF");
  466. int $1_print_ctx(void);
  467. EOF
  468. }
  469. },
  470. { regexp => qr/DECLARE_ASN1_PRINT_FUNCTION_name<<<\((.*),\s*(.*)\)>>>/,
  471. massager => sub {
  472. return (<<"EOF");
  473. int $2_print_ctx(void);
  474. EOF
  475. }
  476. },
  477. { regexp => qr/DECLARE_ASN1_SET_OF<<<\((.*)\)>>>/,
  478. massager => sub { return (); }
  479. },
  480. { regexp => qr/DECLARE_ASN1_DUP_FUNCTION<<<\((.*)\)>>>/,
  481. massager => sub {
  482. return (<<"EOF");
  483. int $1_dup(void);
  484. EOF
  485. }
  486. },
  487. { regexp => qr/DECLARE_ASN1_DUP_FUNCTION_name<<<\((.*),\s*(.*)\)>>>/,
  488. massager => sub {
  489. return (<<"EOF");
  490. int $2_dup(void);
  491. EOF
  492. }
  493. },
  494. # Universal translator of attributed PEM declarators
  495. { regexp => qr/
  496. DECLARE_ASN1
  497. (_ENCODE_FUNCTIONS_only|_ENCODE_FUNCTIONS|_ENCODE_FUNCTIONS_name
  498. |_ALLOC_FUNCTIONS_name|_ALLOC_FUNCTIONS|_FUNCTIONS_name|_FUNCTIONS
  499. |_NDEF_FUNCTION|_PRINT_FUNCTION|_PRINT_FUNCTION_name
  500. |_DUP_FUNCTION|_DUP_FUNCTION_name)
  501. _attr
  502. <<<\(\s*OSSL_DEPRECATEDIN_(.*?)\s*,(.*?)\)>>>
  503. /x,
  504. massager => sub { return (<<"EOF");
  505. DECLARE_ASN1$1($3)
  506. EOF
  507. },
  508. },
  509. { regexp => qr/DECLARE_PKCS12_SET_OF<<<\((.*)\)>>>/,
  510. massager => sub { return (); }
  511. },
  512. #####
  513. # PEM stuff
  514. { regexp => qr/DECLARE_PEM(?|_rw|_rw_cb|_rw_const)<<<\((.*?),.*\)>>>/,
  515. massager => sub { return (<<"EOF");
  516. #ifndef OPENSSL_NO_STDIO
  517. int PEM_read_$1(void);
  518. int PEM_write_$1(void);
  519. #endif
  520. int PEM_read_bio_$1(void);
  521. int PEM_write_bio_$1(void);
  522. EOF
  523. },
  524. },
  525. { regexp => qr/DECLARE_PEM(?|_rw|_rw_cb|_rw_const)_ex<<<\((.*?),.*\)>>>/,
  526. massager => sub { return (<<"EOF");
  527. #ifndef OPENSSL_NO_STDIO
  528. int PEM_read_$1(void);
  529. int PEM_write_$1(void);
  530. int PEM_read_$1_ex(void);
  531. int PEM_write_$1_ex(void);
  532. #endif
  533. int PEM_read_bio_$1(void);
  534. int PEM_write_bio_$1(void);
  535. int PEM_read_bio_$1_ex(void);
  536. int PEM_write_bio_$1_ex(void);
  537. EOF
  538. },
  539. },
  540. { regexp => qr/DECLARE_PEM(?|_write|_write_cb|_write_const)<<<\((.*?),.*\)>>>/,
  541. massager => sub { return (<<"EOF");
  542. #ifndef OPENSSL_NO_STDIO
  543. int PEM_write_$1(void);
  544. #endif
  545. int PEM_write_bio_$1(void);
  546. EOF
  547. },
  548. },
  549. { regexp => qr/DECLARE_PEM(?|_write|_write_cb|_write_const)_ex<<<\((.*?),.*\)>>>/,
  550. massager => sub { return (<<"EOF");
  551. #ifndef OPENSSL_NO_STDIO
  552. int PEM_write_$1(void);
  553. int PEM_write_$1_ex(void);
  554. #endif
  555. int PEM_write_bio_$1(void);
  556. int PEM_write_bio_$1_ex(void);
  557. EOF
  558. },
  559. },
  560. { regexp => qr/DECLARE_PEM(?|_read|_read_cb)<<<\((.*?),.*\)>>>/,
  561. massager => sub { return (<<"EOF");
  562. #ifndef OPENSSL_NO_STDIO
  563. int PEM_read_$1(void);
  564. #endif
  565. int PEM_read_bio_$1(void);
  566. EOF
  567. },
  568. },
  569. { regexp => qr/DECLARE_PEM(?|_read|_read_cb)_ex<<<\((.*?),.*\)>>>/,
  570. massager => sub { return (<<"EOF");
  571. #ifndef OPENSSL_NO_STDIO
  572. int PEM_read_$1(void);
  573. int PEM_read_$1_ex(void);
  574. #endif
  575. int PEM_read_bio_$1(void);
  576. int PEM_read_bio_$1_ex(void);
  577. EOF
  578. },
  579. },
  580. # Universal translator of attributed PEM declarators
  581. { regexp => qr/
  582. DECLARE_PEM
  583. ((?:_rw|_rw_cb|_rw_const|_write|_write_cb|_write_const|_read|_read_cb)
  584. (?:_ex)?)
  585. _attr
  586. <<<\(\s*OSSL_DEPRECATEDIN_(.*?)\s*,(.*?)\)>>>
  587. /x,
  588. massager => sub { return (<<"EOF");
  589. DECLARE_PEM$1($3)
  590. EOF
  591. },
  592. },
  593. # OpenSSL's declaration of externs with possible export linkage
  594. # (really only relevant on Windows)
  595. { regexp => qr/OPENSSL_(?:EXPORT|EXTERN)/,
  596. massager => sub { return ("extern"); }
  597. },
  598. # Spurious stuff found in the OpenSSL headers
  599. # Usually, these are just macros that expand to, well, something
  600. { regexp => qr/__NDK_FPABI__/,
  601. massager => sub { return (); }
  602. },
  603. );
  604. my $anoncnt = 0;
  605. my @chandlers = (
  606. ##################################################################
  607. # C stuff
  608. # extern "C" of individual items
  609. # Note that the main parse function has a special hack for 'extern "C" {'
  610. # which can't be done in handlers
  611. # We simply ignore it.
  612. { regexp => qr/^extern "C" (.*(?:;|>>>))/,
  613. massager => sub { return ($1); },
  614. },
  615. # any other extern is just ignored
  616. { regexp => qr/^\s* # Any spaces before
  617. extern # The keyword we look for
  618. \b # word to non-word boundary
  619. .* # Anything after
  620. ;
  621. /x,
  622. massager => sub { return (); },
  623. },
  624. # union, struct and enum definitions
  625. # Because this one might appear a little everywhere within type
  626. # definitions, we take it out and replace it with just
  627. # 'union|struct|enum name' while registering it.
  628. # This makes use of the parser trick to surround the outer braces
  629. # with <<< and >>>
  630. { regexp => qr/(.*) # Anything before ($1)
  631. \b # word to non-word boundary
  632. (union|struct|enum) # The word used ($2)
  633. (?:\s([[:alpha:]_]\w*))? # Struct or enum name ($3)
  634. <<<(\{.*?\})>>> # Struct or enum definition ($4)
  635. (.*) # Anything after ($5)
  636. ;
  637. /x,
  638. massager => sub {
  639. my $before = $1;
  640. my $word = $2;
  641. my $name = $3
  642. || sprintf("__anon%03d", ++$anoncnt); # Anonymous struct
  643. my $definition = $4;
  644. my $after = $5;
  645. my $type = $word eq "struct" ? 'S' : 'E';
  646. if ($before ne "" || $after ne ";") {
  647. if ($after =~ m|^\w|) { $after = " ".$after; }
  648. return ("$before$word $name$after;",
  649. "$word $name", $type, "", "$word$definition", all_conds());
  650. }
  651. # If there was no before nor after, make the return much simple
  652. return ("", "$word $name", $type, "", "$word$definition", all_conds());
  653. }
  654. },
  655. # Named struct and enum forward declarations
  656. # We really just ignore them, but we need to parse them or the variable
  657. # declaration handler further down will think it's a variable declaration.
  658. { regexp => qr/^(union|struct|enum) ([[:alpha:]_]\w*);/,
  659. massager => sub { return (); }
  660. },
  661. # Function returning function pointer declaration
  662. # This sort of declaration may have a body (inline functions, for example)
  663. { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1)
  664. ((?:\w|\*|\s)*?) # Return type ($2)
  665. \s? # Possible space
  666. <<<\(\*
  667. ([[:alpha:]_]\w*) # Function name ($3)
  668. (\(.*\)) # Parameters ($4)
  669. \)>>>
  670. <<<(\(.*\))>>> # F.p. parameters ($5)
  671. (?:<<<\{.*\}>>>|;) # Body or semicolon
  672. /x,
  673. massager => sub {
  674. return ("", $3, 'T', "", "$2(*$4)$5", all_conds())
  675. if defined $1;
  676. return ("", $3, 'F', "$2(*)$5", "$2(*$4)$5", all_conds()); }
  677. },
  678. # Function pointer declaration, or typedef thereof
  679. # This sort of declaration never has a function body
  680. { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1)
  681. ((?:\w|\*|\s)*?) # Return type ($2)
  682. <<<\(\*([[:alpha:]_]\w*)\)>>> # T.d. or var name ($3)
  683. <<<(\(.*\))>>> # F.p. parameters ($4)
  684. ;
  685. /x,
  686. massager => sub {
  687. return ("", $3, 'T', "", "$2(*)$4", all_conds())
  688. if defined $1;
  689. return ("", $3, 'V', "$2(*)$4", "$2(*)$4", all_conds());
  690. },
  691. },
  692. # Function declaration, or typedef thereof
  693. # This sort of declaration may have a body (inline functions, for example)
  694. { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1)
  695. ((?:\w|\*|\s)*?) # Return type ($2)
  696. \s? # Possible space
  697. ([[:alpha:]_]\w*) # Function name ($3)
  698. <<<(\(.*\))>>> # Parameters ($4)
  699. (?:<<<\{.*\}>>>|;) # Body or semicolon
  700. /x,
  701. massager => sub {
  702. return ("", $3, 'T', "", "$2$4", all_conds())
  703. if defined $1;
  704. return ("", $3, 'F', $2, "$2$4", all_conds());
  705. },
  706. },
  707. # Variable declaration, including arrays, or typedef thereof
  708. { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1)
  709. ((?:\w|\*|\s)*?) # Type ($2)
  710. \s? # Possible space
  711. ([[:alpha:]_]\w*) # Variable name ($3)
  712. ((?:<<<\[[^\]]*\]>>>)*) # Possible array declaration ($4)
  713. ;
  714. /x,
  715. massager => sub {
  716. return ("", $3, 'T', "", $2.($4||""), all_conds())
  717. if defined $1;
  718. return ("", $3, 'V', $2.($4||""), $2.($4||""), all_conds());
  719. },
  720. },
  721. );
  722. # End handlers are almost the same as handlers, except they are run through
  723. # ONCE when the input has been parsed through. These are used to check for
  724. # remaining stuff, such as an unfinished #ifdef and stuff like that that the
  725. # main parser can't check on its own.
  726. my @endhandlers = (
  727. { massager => sub {
  728. my %opts = %{$_[0]};
  729. die "Unfinished preprocessor conditions levels: ",scalar(@preprocessor_conds),($opts{filename} ? " in file ".$opts{filename}: ""),$opts{PLACE}
  730. if @preprocessor_conds;
  731. }
  732. }
  733. );
  734. # takes a list of strings that can each contain one or several lines of code
  735. # also takes a hash of options as last argument.
  736. #
  737. # returns a list of hashes with information:
  738. #
  739. # name name of the thing
  740. # type type, see the massage handler function
  741. # returntype return type of functions and variables
  742. # value value for macros, signature for functions, variables
  743. # and structs
  744. # conds preprocessor conditions (array ref)
  745. sub parse {
  746. my %opts;
  747. if (ref($_[$#_]) eq "HASH") {
  748. %opts = %{$_[$#_]};
  749. pop @_;
  750. }
  751. my %state = (
  752. in_extern_C => 0, # An exception to parenthesis processing.
  753. cpp_parens => [], # A list of ending parens and braces found in
  754. # preprocessor directives
  755. c_parens => [], # A list of ending parens and braces found in
  756. # C statements
  757. in_string => "", # empty string when outside a string, otherwise
  758. # "'" or '"' depending on the starting quote.
  759. in_comment => "", # empty string when outside a comment, otherwise
  760. # "/*" or "//" depending on the type of comment
  761. # found. The latter will never be multiline
  762. # NOTE: in_string and in_comment will never be
  763. # true (in perl semantics) at the same time.
  764. current_line => 0,
  765. );
  766. my @result = ();
  767. my $normalized_line = ""; # $input_line, but normalized. In essence, this
  768. # means that ALL whitespace is removed unless
  769. # it absolutely has to be present, and in that
  770. # case, there's only one space.
  771. # The cases where a space needs to stay present
  772. # are:
  773. # 1. between words
  774. # 2. between words and number
  775. # 3. after the first word of a preprocessor
  776. # directive.
  777. # 4. for the #define directive, between the macro
  778. # name/args and its value, so we end up with:
  779. # #define FOO val
  780. # #define BAR(x) something(x)
  781. my $collected_stmt = ""; # Where we're building up a C line until it's a
  782. # complete definition/declaration, as determined
  783. # by any handler being capable of matching it.
  784. # We use $_ shamelessly when looking through @lines.
  785. # In case we find a \ at the end, we keep filling it up with more lines.
  786. $_ = undef;
  787. foreach my $line (@_) {
  788. # split tries to be smart when a string ends with the thing we split on
  789. $line .= "\n" unless $line =~ m|\R$|;
  790. $line .= "#";
  791. # We use ¦undef¦ as a marker for a new line from the file.
  792. # Since we convert one line to several and unshift that into @lines,
  793. # that's the only safe way we have to track the original lines
  794. my @lines = map { ( undef, $_ ) } split m|\R|, $line;
  795. # Remember that extra # we added above? Now we remove it
  796. pop @lines;
  797. pop @lines; # Don't forget the undef
  798. while (@lines) {
  799. if (!defined($lines[0])) {
  800. shift @lines;
  801. $state{current_line}++;
  802. if (!defined($_)) {
  803. $opts{PLACE} = " at ".$opts{filename}." line ".$state{current_line}."\n";
  804. $opts{PLACE2} = $opts{filename}.":".$state{current_line};
  805. }
  806. next;
  807. }
  808. $_ = "" unless defined $_;
  809. $_ .= shift @lines;
  810. if (m|\\$|) {
  811. $_ = $`;
  812. next;
  813. }
  814. if ($opts{debug}) {
  815. print STDERR "DEBUG:----------------------------\n";
  816. print STDERR "DEBUG: \$_ = '$_'\n";
  817. }
  818. ##########################################################
  819. # Now that we have a full line, let's process through it
  820. while(1) {
  821. unless ($state{in_comment}) {
  822. # Begin with checking if the current $normalized_line
  823. # contains a preprocessor directive
  824. # This is only done if we're not inside a comment and
  825. # if it's a preprocessor directive and it's finished.
  826. if ($normalized_line =~ m|^#| && $_ eq "") {
  827. print STDERR "DEBUG[OPENSSL CPP]: \$normalized_line = '$normalized_line'\n"
  828. if $opts{debug};
  829. $opts{debug_type} = "OPENSSL CPP";
  830. my @r = ( _run_handlers($normalized_line,
  831. @opensslcpphandlers,
  832. \%opts) );
  833. if (shift @r) {
  834. # Checking if there are lines to inject.
  835. if (@r) {
  836. @r = split $/, (pop @r).$_;
  837. print STDERR "DEBUG[OPENSSL CPP]: injecting '", join("', '", @r),"'\n"
  838. if $opts{debug} && @r;
  839. @lines = ( @r, @lines );
  840. $_ = "";
  841. }
  842. } else {
  843. print STDERR "DEBUG[CPP]: \$normalized_line = '$normalized_line'\n"
  844. if $opts{debug};
  845. $opts{debug_type} = "CPP";
  846. my @r = ( _run_handlers($normalized_line,
  847. @cpphandlers,
  848. \%opts) );
  849. if (shift @r) {
  850. if (ref($r[0]) eq "HASH") {
  851. push @result, shift @r;
  852. }
  853. # Now, check if there are lines to inject.
  854. # Really, this should never happen, it IS a
  855. # preprocessor directive after all...
  856. if (@r) {
  857. @r = split $/, pop @r;
  858. print STDERR "DEBUG[CPP]: injecting '", join("', '", @r),"'\n"
  859. if $opts{debug} && @r;
  860. @lines = ( @r, @lines );
  861. $_ = "";
  862. }
  863. }
  864. }
  865. # Note: we simply ignore all directives that no
  866. # handler matches
  867. $normalized_line = "";
  868. }
  869. # If the two strings end and start with a character that
  870. # shouldn't get concatenated, add a space
  871. my $space =
  872. ($collected_stmt =~ m/(?:"|')$/
  873. || ($collected_stmt =~ m/(?:\w|\d)$/
  874. && $normalized_line =~ m/^(?:\w|\d)/)) ? " " : "";
  875. # Now, unless we're building up a preprocessor directive or
  876. # are in the middle of a string, or the parens et al aren't
  877. # balanced up yet, let's try and see if there's a OpenSSL
  878. # or C handler that can make sense of what we have so far.
  879. if ( $normalized_line !~ m|^#|
  880. && ($collected_stmt ne "" || $normalized_line ne "")
  881. && ! @{$state{c_parens}}
  882. && ! $state{in_string} ) {
  883. if ($opts{debug}) {
  884. print STDERR "DEBUG[OPENSSL C]: \$collected_stmt = '$collected_stmt'\n";
  885. print STDERR "DEBUG[OPENSSL C]: \$normalized_line = '$normalized_line'\n";
  886. }
  887. $opts{debug_type} = "OPENSSL C";
  888. my @r = ( _run_handlers($collected_stmt
  889. .$space
  890. .$normalized_line,
  891. @opensslchandlers,
  892. \%opts) );
  893. if (shift @r) {
  894. # Checking if there are lines to inject.
  895. if (@r) {
  896. @r = split $/, (pop @r).$_;
  897. print STDERR "DEBUG[OPENSSL]: injecting '", join("', '", @r),"'\n"
  898. if $opts{debug} && @r;
  899. @lines = ( @r, @lines );
  900. $_ = "";
  901. }
  902. $normalized_line = "";
  903. $collected_stmt = "";
  904. } else {
  905. if ($opts{debug}) {
  906. print STDERR "DEBUG[C]: \$collected_stmt = '$collected_stmt'\n";
  907. print STDERR "DEBUG[C]: \$normalized_line = '$normalized_line'\n";
  908. }
  909. $opts{debug_type} = "C";
  910. my @r = ( _run_handlers($collected_stmt
  911. .$space
  912. .$normalized_line,
  913. @chandlers,
  914. \%opts) );
  915. if (shift @r) {
  916. if (ref($r[0]) eq "HASH") {
  917. push @result, shift @r;
  918. }
  919. # Checking if there are lines to inject.
  920. if (@r) {
  921. @r = split $/, (pop @r).$_;
  922. print STDERR "DEBUG[C]: injecting '", join("', '", @r),"'\n"
  923. if $opts{debug} && @r;
  924. @lines = ( @r, @lines );
  925. $_ = "";
  926. }
  927. $normalized_line = "";
  928. $collected_stmt = "";
  929. }
  930. }
  931. }
  932. if ($_ eq "") {
  933. $collected_stmt .= $space.$normalized_line;
  934. $normalized_line = "";
  935. }
  936. }
  937. if ($_ eq "") {
  938. $_ = undef;
  939. last;
  940. }
  941. # Take care of inside string first.
  942. if ($state{in_string}) {
  943. if (m/ (?:^|(?<!\\)) # Make sure it's not escaped
  944. $state{in_string} # Look for matching quote
  945. /x) {
  946. $normalized_line .= $`.$&;
  947. $state{in_string} = "";
  948. $_ = $';
  949. next;
  950. } else {
  951. die "Unfinished string without continuation found$opts{PLACE}\n";
  952. }
  953. }
  954. # ... or inside comments, whichever happens to apply
  955. elsif ($state{in_comment}) {
  956. # This should never happen
  957. die "Something went seriously wrong, multiline //???$opts{PLACE}\n"
  958. if ($state{in_comment} eq "//");
  959. # A note: comments are simply discarded.
  960. if (m/ (?:^|(?<!\\)) # Make sure it's not escaped
  961. \*\/ # Look for C comment end
  962. /x) {
  963. $state{in_comment} = "";
  964. $_ = $';
  965. print STDERR "DEBUG: Found end of comment, followed by '$_'\n"
  966. if $opts{debug};
  967. next;
  968. } else {
  969. $_ = "";
  970. next;
  971. }
  972. }
  973. # At this point, it's safe to remove leading whites, but
  974. # we need to be careful with some preprocessor lines
  975. if (m|^\s+|) {
  976. my $rest = $';
  977. my $space = "";
  978. $space = " "
  979. if ($normalized_line =~ m/^
  980. \#define\s\w(?:\w|\d)*(?:<<<\([^\)]*\)>>>)?
  981. | \#[a-z]+
  982. $/x);
  983. print STDERR "DEBUG: Processing leading spaces: \$normalized_line = '$normalized_line', \$space = '$space', \$rest = '$rest'\n"
  984. if $opts{debug};
  985. $_ = $space.$rest;
  986. }
  987. my $parens =
  988. $normalized_line =~ m|^#| ? 'cpp_parens' : 'c_parens';
  989. (my $paren_singular = $parens) =~ s|s$||;
  990. # Now check for specific tokens, and if they are parens,
  991. # check them against $state{$parens}. Note that we surround
  992. # the outermost parens with extra "<<<" and ">>>". Those
  993. # are for the benefit of handlers who to need to detect
  994. # them, and they will be removed from the final output.
  995. if (m|^[\{\[\(]|) {
  996. my $body = $&;
  997. $_ = $';
  998. if (!@{$state{$parens}}) {
  999. if ("$normalized_line$body" =~ m|^extern "C"\{$|) {
  1000. $state{in_extern_C} = 1;
  1001. print STDERR "DEBUG: found start of 'extern \"C\"' ($normalized_line$body)\n"
  1002. if $opts{debug};
  1003. $normalized_line = "";
  1004. } else {
  1005. $normalized_line .= "<<<".$body;
  1006. }
  1007. } else {
  1008. $normalized_line .= $body;
  1009. }
  1010. if ($normalized_line ne "") {
  1011. print STDERR "DEBUG: found $paren_singular start '$body'\n"
  1012. if $opts{debug};
  1013. $body =~ tr|\{\[\(|\}\]\)|;
  1014. print STDERR "DEBUG: pushing $paren_singular end '$body'\n"
  1015. if $opts{debug};
  1016. push @{$state{$parens}}, $body;
  1017. }
  1018. } elsif (m|^[\}\]\)]|) {
  1019. $_ = $';
  1020. if (!@{$state{$parens}}
  1021. && $& eq '}' && $state{in_extern_C}) {
  1022. print STDERR "DEBUG: found end of 'extern \"C\"'\n"
  1023. if $opts{debug};
  1024. $state{in_extern_C} = 0;
  1025. } else {
  1026. print STDERR "DEBUG: Trying to match '$&' against '"
  1027. ,join("', '", @{$state{$parens}})
  1028. ,"'\n"
  1029. if $opts{debug};
  1030. die "Unmatched parentheses$opts{PLACE}\n"
  1031. unless (@{$state{$parens}}
  1032. && pop @{$state{$parens}} eq $&);
  1033. if (!@{$state{$parens}}) {
  1034. $normalized_line .= $&.">>>";
  1035. } else {
  1036. $normalized_line .= $&;
  1037. }
  1038. }
  1039. } elsif (m|^["']|) { # string start
  1040. my $body = $&;
  1041. $_ = $';
  1042. # We want to separate strings from \w and \d with one space.
  1043. $normalized_line .= " " if $normalized_line =~ m/(\w|\d)$/;
  1044. $normalized_line .= $body;
  1045. $state{in_string} = $body;
  1046. } elsif (m|^\/\*|) { # C style comment
  1047. print STDERR "DEBUG: found start of C style comment\n"
  1048. if $opts{debug};
  1049. $state{in_comment} = $&;
  1050. $_ = $';
  1051. } elsif (m|^\/\/|) { # C++ style comment
  1052. print STDERR "DEBUG: found C++ style comment\n"
  1053. if $opts{debug};
  1054. $_ = ""; # (just discard it entirely)
  1055. } elsif (m/^ (?| (?: 0[xX][[:xdigit:]]+ | 0[bB][01]+ | [0-9]+ )
  1056. (?i: U | L | UL | LL | ULL )?
  1057. | [0-9]+\.[0-9]+(?:[eE][\-\+]\d+)? (?i: F | L)?
  1058. ) /x) {
  1059. print STDERR "DEBUG: Processing numbers: \$normalized_line = '$normalized_line', \$& = '$&', \$' = '$''\n"
  1060. if $opts{debug};
  1061. $normalized_line .= $&;
  1062. $_ = $';
  1063. } elsif (m/^[[:alpha:]_]\w*/) {
  1064. my $body = $&;
  1065. my $rest = $';
  1066. my $space = "";
  1067. # Now, only add a space if it's needed to separate
  1068. # two \w characters, and we also surround strings with
  1069. # a space. In this case, that's if $normalized_line ends
  1070. # with a \w, \d, " or '.
  1071. $space = " "
  1072. if ($normalized_line =~ m/("|')$/
  1073. || ($normalized_line =~ m/(\w|\d)$/
  1074. && $body =~ m/^(\w|\d)/));
  1075. print STDERR "DEBUG: Processing words: \$normalized_line = '$normalized_line', \$space = '$space', \$body = '$body', \$rest = '$rest'\n"
  1076. if $opts{debug};
  1077. $normalized_line .= $space.$body;
  1078. $_ = $rest;
  1079. } elsif (m|^(?:\\)?.|) { # Catch-all
  1080. $normalized_line .= $&;
  1081. $_ = $';
  1082. }
  1083. }
  1084. }
  1085. }
  1086. foreach my $handler (@endhandlers) {
  1087. if ($handler->{massager}) {
  1088. $handler->{massager}->(\%opts);
  1089. }
  1090. }
  1091. return @result;
  1092. }
  1093. # arg1: line to check
  1094. # arg2...: handlers to check
  1095. # return undef when no handler matched
  1096. sub _run_handlers {
  1097. my %opts;
  1098. if (ref($_[$#_]) eq "HASH") {
  1099. %opts = %{$_[$#_]};
  1100. pop @_;
  1101. }
  1102. my $line = shift;
  1103. my @handlers = @_;
  1104. foreach my $handler (@handlers) {
  1105. if ($handler->{regexp}
  1106. && $line =~ m|^$handler->{regexp}$|) {
  1107. if ($handler->{massager}) {
  1108. if ($opts{debug}) {
  1109. print STDERR "DEBUG[",$opts{debug_type},"]: Trying to handle '$line'\n";
  1110. print STDERR "DEBUG[",$opts{debug_type},"]: (matches /\^",$handler->{regexp},"\$/)\n";
  1111. }
  1112. my $saved_line = $line;
  1113. my @massaged =
  1114. map { s/(<<<|>>>)//g; $_ }
  1115. $handler->{massager}->($saved_line, \%opts);
  1116. print STDERR "DEBUG[",$opts{debug_type},"]: Got back '"
  1117. , join("', '", @massaged), "'\n"
  1118. if $opts{debug};
  1119. # Because we may get back new lines to be
  1120. # injected before whatever else that follows,
  1121. # and the injected stuff might include
  1122. # preprocessor lines, we need to inject them
  1123. # in @lines and set $_ to the empty string to
  1124. # break out from the inner loops
  1125. my $injected_lines = shift @massaged || "";
  1126. if (@massaged) {
  1127. return (1,
  1128. {
  1129. name => shift @massaged,
  1130. type => shift @massaged,
  1131. returntype => shift @massaged,
  1132. value => shift @massaged,
  1133. conds => [ @massaged ]
  1134. },
  1135. $injected_lines
  1136. );
  1137. } else {
  1138. print STDERR "DEBUG[",$opts{debug_type},"]: (ignore, possible side effects)\n"
  1139. if $opts{debug} && $injected_lines eq "";
  1140. return (1, $injected_lines);
  1141. }
  1142. }
  1143. return (1);
  1144. }
  1145. }
  1146. return (0);
  1147. }