curl_fnmatch.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. /***************************************************************************
  2. * _ _ ____ _
  3. * Project ___| | | | _ \| |
  4. * / __| | | | |_) | |
  5. * | (__| |_| | _ <| |___
  6. * \___|\___/|_| \_\_____|
  7. *
  8. * Copyright (C) 1998 - 2010, Daniel Stenberg, <daniel@haxx.se>, et al.
  9. *
  10. * This software is licensed as described in the file COPYING, which
  11. * you should have received as part of this distribution. The terms
  12. * are also available at http://curl.haxx.se/docs/copyright.html.
  13. *
  14. * You may opt to use, copy, modify, merge, publish, distribute and/or sell
  15. * copies of the Software, and permit persons to whom the Software is
  16. * furnished to do so, under the terms of the COPYING file.
  17. *
  18. * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  19. * KIND, either express or implied.
  20. *
  21. ***************************************************************************/
  22. #include "setup.h"
  23. #include "curl_fnmatch.h"
  24. #define _MPRINTF_REPLACE /* use our functions only */
  25. #include <curl/mprintf.h>
  26. #include "curl_memory.h"
  27. /* The last #include file should be: */
  28. #include "memdebug.h"
  29. #define CURLFNM_CHARSET_LEN (sizeof(char) * 256)
  30. #define CURLFNM_CHSET_SIZE (CURLFNM_CHARSET_LEN + 15)
  31. #define CURLFNM_NEGATE CURLFNM_CHARSET_LEN
  32. #define CURLFNM_ALNUM (CURLFNM_CHARSET_LEN + 1)
  33. #define CURLFNM_DIGIT (CURLFNM_CHARSET_LEN + 2)
  34. #define CURLFNM_XDIGIT (CURLFNM_CHARSET_LEN + 3)
  35. #define CURLFNM_ALPHA (CURLFNM_CHARSET_LEN + 4)
  36. #define CURLFNM_PRINT (CURLFNM_CHARSET_LEN + 5)
  37. #define CURLFNM_BLANK (CURLFNM_CHARSET_LEN + 6)
  38. #define CURLFNM_LOWER (CURLFNM_CHARSET_LEN + 7)
  39. #define CURLFNM_GRAPH (CURLFNM_CHARSET_LEN + 8)
  40. #define CURLFNM_SPACE (CURLFNM_CHARSET_LEN + 9)
  41. #define CURLFNM_UPPER (CURLFNM_CHARSET_LEN + 10)
  42. typedef enum {
  43. CURLFNM_LOOP_DEFAULT = 0,
  44. CURLFNM_LOOP_BACKSLASH
  45. } loop_state;
  46. typedef enum {
  47. CURLFNM_SCHS_DEFAULT = 0,
  48. CURLFNM_SCHS_MAYRANGE,
  49. CURLFNM_SCHS_MAYRANGE2,
  50. CURLFNM_SCHS_RIGHTBR,
  51. CURLFNM_SCHS_RIGHTBRLEFTBR
  52. } setcharset_state;
  53. typedef enum {
  54. CURLFNM_PKW_INIT = 0,
  55. CURLFNM_PKW_DDOT
  56. } parsekey_state;
  57. #define SETCHARSET_OK 1
  58. #define SETCHARSET_FAIL 0
  59. static int parsekeyword(unsigned char **pattern, unsigned char *charset)
  60. {
  61. parsekey_state state = CURLFNM_PKW_INIT;
  62. #define KEYLEN 10
  63. char keyword[KEYLEN] = { 0 };
  64. int found = FALSE;
  65. int i;
  66. unsigned char *p = *pattern;
  67. for(i = 0; !found; i++) {
  68. char c = *p++;
  69. if(i >= KEYLEN)
  70. return SETCHARSET_FAIL;
  71. switch(state) {
  72. case CURLFNM_PKW_INIT:
  73. if(ISALPHA(c) && ISLOWER(c))
  74. keyword[i] = c;
  75. else if(c == ':')
  76. state = CURLFNM_PKW_DDOT;
  77. else
  78. return 0;
  79. break;
  80. case CURLFNM_PKW_DDOT:
  81. if(c == ']')
  82. found = TRUE;
  83. else
  84. return SETCHARSET_FAIL;
  85. }
  86. }
  87. #undef KEYLEN
  88. *pattern = p; /* move caller's pattern pointer */
  89. if(strcmp(keyword, "digit") == 0)
  90. charset[CURLFNM_DIGIT] = 1;
  91. else if(strcmp(keyword, "alnum") == 0)
  92. charset[CURLFNM_ALNUM] = 1;
  93. else if(strcmp(keyword, "alpha") == 0)
  94. charset[CURLFNM_ALPHA] = 1;
  95. else if(strcmp(keyword, "xdigit") == 0)
  96. charset[CURLFNM_XDIGIT] = 1;
  97. else if(strcmp(keyword, "print") == 0)
  98. charset[CURLFNM_PRINT] = 1;
  99. else if(strcmp(keyword, "graph") == 0)
  100. charset[CURLFNM_GRAPH] = 1;
  101. else if(strcmp(keyword, "space") == 0)
  102. charset[CURLFNM_SPACE] = 1;
  103. else if(strcmp(keyword, "blank") == 0)
  104. charset[CURLFNM_BLANK] = 1;
  105. else if(strcmp(keyword, "upper") == 0)
  106. charset[CURLFNM_UPPER] = 1;
  107. else if(strcmp(keyword, "lower") == 0)
  108. charset[CURLFNM_LOWER] = 1;
  109. else
  110. return SETCHARSET_FAIL;
  111. return SETCHARSET_OK;
  112. }
  113. /* returns 1 (true) if pattern is OK, 0 if is bad ("p" is pattern pointer) */
  114. static int setcharset(unsigned char **p, unsigned char *charset)
  115. {
  116. setcharset_state state = CURLFNM_SCHS_DEFAULT;
  117. unsigned char rangestart = 0;
  118. unsigned char lastchar = 0;
  119. bool something_found = FALSE;
  120. unsigned char c;
  121. for(;;) {
  122. c = **p;
  123. switch(state) {
  124. case CURLFNM_SCHS_DEFAULT:
  125. if(ISALNUM(c)) { /* ASCII value */
  126. rangestart = c;
  127. charset[c] = 1;
  128. (*p)++;
  129. state = CURLFNM_SCHS_MAYRANGE;
  130. something_found = TRUE;
  131. }
  132. else if(c == ']') {
  133. if(something_found)
  134. return SETCHARSET_OK;
  135. else
  136. something_found = TRUE;
  137. state = CURLFNM_SCHS_RIGHTBR;
  138. charset[c] = 1;
  139. (*p)++;
  140. }
  141. else if(c == '[') {
  142. char c2 = *((*p)+1);
  143. if(c2 == ':') { /* there has to be a keyword */
  144. (*p) += 2;
  145. if(parsekeyword(p, charset)) {
  146. state = CURLFNM_SCHS_DEFAULT;
  147. }
  148. else
  149. return SETCHARSET_FAIL;
  150. }
  151. else {
  152. charset[c] = 1;
  153. (*p)++;
  154. }
  155. something_found = TRUE;
  156. }
  157. else if(c == '?' || c == '*') {
  158. something_found = TRUE;
  159. charset[c] = 1;
  160. (*p)++;
  161. }
  162. else if(c == '^' || c == '!') {
  163. if(!something_found) {
  164. if(charset[CURLFNM_NEGATE]) {
  165. charset[c] = 1;
  166. something_found = TRUE;
  167. }
  168. else
  169. charset[CURLFNM_NEGATE] = 1; /* negate charset */
  170. }
  171. else
  172. charset[c] = 1;
  173. (*p)++;
  174. }
  175. else if(c == '\\') {
  176. c = *(++(*p));
  177. if(ISPRINT((c))) {
  178. something_found = TRUE;
  179. state = CURLFNM_SCHS_MAYRANGE;
  180. charset[c] = 1;
  181. rangestart = c;
  182. (*p)++;
  183. }
  184. else
  185. return SETCHARSET_FAIL;
  186. }
  187. else if(c == '\0') {
  188. return SETCHARSET_FAIL;
  189. }
  190. else {
  191. charset[c] = 1;
  192. (*p)++;
  193. something_found = TRUE;
  194. }
  195. break;
  196. case CURLFNM_SCHS_MAYRANGE:
  197. if(c == '-') {
  198. charset[c] = 1;
  199. (*p)++;
  200. lastchar = '-';
  201. state = CURLFNM_SCHS_MAYRANGE2;
  202. }
  203. else if(c == '[') {
  204. state = CURLFNM_SCHS_DEFAULT;
  205. }
  206. else if(ISALNUM(c)) {
  207. charset[c] = 1;
  208. (*p)++;
  209. }
  210. else if(c == '\\') {
  211. c = *(++(*p));
  212. if(ISPRINT(c)) {
  213. charset[c] = 1;
  214. (*p)++;
  215. }
  216. else
  217. return SETCHARSET_FAIL;
  218. }
  219. else if(c == ']') {
  220. return SETCHARSET_OK;
  221. }
  222. else
  223. return SETCHARSET_FAIL;
  224. break;
  225. case CURLFNM_SCHS_MAYRANGE2:
  226. if(c == '\\') {
  227. c = *(++(*p));
  228. if(!ISPRINT(c))
  229. return SETCHARSET_FAIL;
  230. }
  231. if(c == ']') {
  232. return SETCHARSET_OK;
  233. }
  234. else if(c == '\\') {
  235. c = *(++(*p));
  236. if(ISPRINT(c)) {
  237. charset[c] = 1;
  238. state = CURLFNM_SCHS_DEFAULT;
  239. (*p)++;
  240. }
  241. else
  242. return SETCHARSET_FAIL;
  243. }
  244. if(c >= rangestart) {
  245. if((ISLOWER(c) && ISLOWER(rangestart)) ||
  246. (ISDIGIT(c) && ISDIGIT(rangestart)) ||
  247. (ISUPPER(c) && ISUPPER(rangestart))) {
  248. charset[lastchar] = 0;
  249. rangestart++;
  250. while(rangestart++ <= c)
  251. charset[rangestart-1] = 1;
  252. (*p)++;
  253. state = CURLFNM_SCHS_DEFAULT;
  254. }
  255. else
  256. return SETCHARSET_FAIL;
  257. }
  258. break;
  259. case CURLFNM_SCHS_RIGHTBR:
  260. if(c == '[') {
  261. state = CURLFNM_SCHS_RIGHTBRLEFTBR;
  262. charset[c] = 1;
  263. (*p)++;
  264. }
  265. else if(c == ']') {
  266. return SETCHARSET_OK;
  267. }
  268. else if(c == '\0') {
  269. return SETCHARSET_FAIL;
  270. }
  271. else if(ISPRINT(c)) {
  272. charset[c] = 1;
  273. (*p)++;
  274. state = CURLFNM_SCHS_DEFAULT;
  275. }
  276. else
  277. /* used 'goto fail' instead of 'return SETCHARSET_FAIL' to avoid a
  278. * nonsense warning 'statement not reached' at end of the fnc when
  279. * compiling on Solaris */
  280. goto fail;
  281. break;
  282. case CURLFNM_SCHS_RIGHTBRLEFTBR:
  283. if(c == ']') {
  284. return SETCHARSET_OK;
  285. }
  286. else {
  287. state = CURLFNM_SCHS_DEFAULT;
  288. charset[c] = 1;
  289. (*p)++;
  290. }
  291. break;
  292. }
  293. }
  294. fail:
  295. return SETCHARSET_FAIL;
  296. }
  297. static int loop(const unsigned char *pattern, const unsigned char *string)
  298. {
  299. loop_state state = CURLFNM_LOOP_DEFAULT;
  300. unsigned char *p = (unsigned char *)pattern;
  301. unsigned char *s = (unsigned char *)string;
  302. unsigned char charset[CURLFNM_CHSET_SIZE] = { 0 };
  303. int rc = 0;
  304. for (;;) {
  305. switch(state) {
  306. case CURLFNM_LOOP_DEFAULT:
  307. if(*p == '*') {
  308. while(*(p+1) == '*') /* eliminate multiple stars */
  309. p++;
  310. if(*s == '\0' && *(p+1) == '\0')
  311. return CURL_FNMATCH_MATCH;
  312. rc = loop(p + 1, s); /* *.txt matches .txt <=> .txt matches .txt */
  313. if(rc == CURL_FNMATCH_MATCH)
  314. return CURL_FNMATCH_MATCH;
  315. if(*s) /* let the star eat up one character */
  316. s++;
  317. else
  318. return CURL_FNMATCH_NOMATCH;
  319. }
  320. else if(*p == '?') {
  321. if(ISPRINT(*s)) {
  322. s++;
  323. p++;
  324. }
  325. else if(*s == '\0')
  326. return CURL_FNMATCH_NOMATCH;
  327. else
  328. return CURL_FNMATCH_FAIL; /* cannot deal with other character */
  329. }
  330. else if(*p == '\0') {
  331. if(*s == '\0')
  332. return CURL_FNMATCH_MATCH;
  333. else
  334. return CURL_FNMATCH_NOMATCH;
  335. }
  336. else if(*p == '\\') {
  337. state = CURLFNM_LOOP_BACKSLASH;
  338. p++;
  339. }
  340. else if(*p == '[') {
  341. unsigned char *pp = p+1; /* cannot handle with pointer to register */
  342. if(setcharset(&pp, charset)) {
  343. int found = FALSE;
  344. if(charset[(unsigned int)*s])
  345. found = TRUE;
  346. else if(charset[CURLFNM_ALNUM])
  347. found = ISALNUM(*s);
  348. else if(charset[CURLFNM_ALPHA])
  349. found = ISALPHA(*s);
  350. else if(charset[CURLFNM_DIGIT])
  351. found = ISDIGIT(*s);
  352. else if(charset[CURLFNM_XDIGIT])
  353. found = ISXDIGIT(*s);
  354. else if(charset[CURLFNM_PRINT])
  355. found = ISPRINT(*s);
  356. else if(charset[CURLFNM_SPACE])
  357. found = ISSPACE(*s);
  358. else if(charset[CURLFNM_UPPER])
  359. found = ISUPPER(*s);
  360. else if(charset[CURLFNM_LOWER])
  361. found = ISLOWER(*s);
  362. else if(charset[CURLFNM_BLANK])
  363. found = ISBLANK(*s);
  364. else if(charset[CURLFNM_GRAPH])
  365. found = ISGRAPH(*s);
  366. if(charset[CURLFNM_NEGATE])
  367. found = !found;
  368. if(found) {
  369. p = pp+1;
  370. s++;
  371. memset(charset, 0, CURLFNM_CHSET_SIZE);
  372. }
  373. else
  374. return CURL_FNMATCH_NOMATCH;
  375. }
  376. else
  377. return CURL_FNMATCH_FAIL;
  378. }
  379. else {
  380. if(*p++ != *s++)
  381. return CURL_FNMATCH_NOMATCH;
  382. }
  383. break;
  384. case CURLFNM_LOOP_BACKSLASH:
  385. if(ISPRINT(*p)) {
  386. if(*p++ == *s++)
  387. state = CURLFNM_LOOP_DEFAULT;
  388. else
  389. return CURL_FNMATCH_NOMATCH;
  390. }
  391. else
  392. return CURL_FNMATCH_FAIL;
  393. break;
  394. }
  395. }
  396. }
  397. int Curl_fnmatch(void *ptr, const char *pattern, const char *string)
  398. {
  399. (void)ptr; /* the argument is specified by the curl_fnmatch_callback
  400. prototype, but not used by Curl_fnmatch() */
  401. if(!pattern || !string) {
  402. return CURL_FNMATCH_FAIL;
  403. }
  404. return loop((unsigned char *)pattern, (unsigned char *)string);
  405. }