template_utils.c 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. /*
  2. * LuCI Template - Utility functions
  3. *
  4. * Copyright (C) 2010 Jo-Philipp Wich <jow@openwrt.org>
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include "template_utils.h"
  19. #include "template_lmo.h"
  20. /* initialize a buffer object */
  21. struct template_buffer * buf_init(int size)
  22. {
  23. struct template_buffer *buf;
  24. if (size <= 0)
  25. size = 1024;
  26. buf = (struct template_buffer *)malloc(sizeof(struct template_buffer));
  27. if (buf != NULL)
  28. {
  29. buf->fill = 0;
  30. buf->size = size;
  31. buf->data = malloc(buf->size);
  32. if (buf->data != NULL)
  33. {
  34. buf->dptr = buf->data;
  35. buf->data[0] = 0;
  36. return buf;
  37. }
  38. free(buf);
  39. }
  40. return NULL;
  41. }
  42. /* grow buffer */
  43. int buf_grow(struct template_buffer *buf, int size)
  44. {
  45. unsigned int off = (buf->dptr - buf->data);
  46. char *data;
  47. if (size <= 0)
  48. size = 1024;
  49. data = realloc(buf->data, buf->size + size);
  50. if (data != NULL)
  51. {
  52. buf->data = data;
  53. buf->dptr = data + off;
  54. buf->size += size;
  55. return buf->size;
  56. }
  57. return 0;
  58. }
  59. /* put one char into buffer object */
  60. int buf_putchar(struct template_buffer *buf, char c)
  61. {
  62. if( ((buf->fill + 1) >= buf->size) && !buf_grow(buf, 0) )
  63. return 0;
  64. *(buf->dptr++) = c;
  65. *(buf->dptr) = 0;
  66. buf->fill++;
  67. return 1;
  68. }
  69. /* append data to buffer */
  70. int buf_append(struct template_buffer *buf, const char *s, int len)
  71. {
  72. if ((buf->fill + len + 1) >= buf->size)
  73. {
  74. if (!buf_grow(buf, len + 1))
  75. return 0;
  76. }
  77. memcpy(buf->dptr, s, len);
  78. buf->fill += len;
  79. buf->dptr += len;
  80. *(buf->dptr) = 0;
  81. return len;
  82. }
  83. /* read buffer length */
  84. int buf_length(struct template_buffer *buf)
  85. {
  86. return buf->fill;
  87. }
  88. /* destroy buffer object and return pointer to data */
  89. char * buf_destroy(struct template_buffer *buf)
  90. {
  91. char *data = buf->data;
  92. free(buf);
  93. return data;
  94. }
  95. /* calculate the number of expected continuation chars */
  96. static inline int mb_num_chars(unsigned char c)
  97. {
  98. if ((c & 0xE0) == 0xC0)
  99. return 2;
  100. else if ((c & 0xF0) == 0xE0)
  101. return 3;
  102. else if ((c & 0xF8) == 0xF0)
  103. return 4;
  104. else if ((c & 0xFC) == 0xF8)
  105. return 5;
  106. else if ((c & 0xFE) == 0xFC)
  107. return 6;
  108. return 1;
  109. }
  110. /* test whether the given byte is a valid continuation char */
  111. static inline int mb_is_cont(unsigned char c)
  112. {
  113. return ((c >= 0x80) && (c <= 0xBF));
  114. }
  115. /* test whether the byte sequence at the given pointer with the given
  116. * length is the shortest possible representation of the code point */
  117. static inline int mb_is_shortest(unsigned char *s, int n)
  118. {
  119. switch (n)
  120. {
  121. case 2:
  122. /* 1100000x (10xxxxxx) */
  123. return !(((*s >> 1) == 0x60) &&
  124. ((*(s+1) >> 6) == 0x02));
  125. case 3:
  126. /* 11100000 100xxxxx (10xxxxxx) */
  127. return !((*s == 0xE0) &&
  128. ((*(s+1) >> 5) == 0x04) &&
  129. ((*(s+2) >> 6) == 0x02));
  130. case 4:
  131. /* 11110000 1000xxxx (10xxxxxx 10xxxxxx) */
  132. return !((*s == 0xF0) &&
  133. ((*(s+1) >> 4) == 0x08) &&
  134. ((*(s+2) >> 6) == 0x02) &&
  135. ((*(s+3) >> 6) == 0x02));
  136. case 5:
  137. /* 11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx) */
  138. return !((*s == 0xF8) &&
  139. ((*(s+1) >> 3) == 0x10) &&
  140. ((*(s+2) >> 6) == 0x02) &&
  141. ((*(s+3) >> 6) == 0x02) &&
  142. ((*(s+4) >> 6) == 0x02));
  143. case 6:
  144. /* 11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx) */
  145. return !((*s == 0xF8) &&
  146. ((*(s+1) >> 2) == 0x20) &&
  147. ((*(s+2) >> 6) == 0x02) &&
  148. ((*(s+3) >> 6) == 0x02) &&
  149. ((*(s+4) >> 6) == 0x02) &&
  150. ((*(s+5) >> 6) == 0x02));
  151. }
  152. return 1;
  153. }
  154. /* test whether the byte sequence at the given pointer with the given
  155. * length is an UTF-16 surrogate */
  156. static inline int mb_is_surrogate(unsigned char *s, int n)
  157. {
  158. return ((n == 3) && (*s == 0xED) && (*(s+1) >= 0xA0) && (*(s+1) <= 0xBF));
  159. }
  160. /* test whether the byte sequence at the given pointer with the given
  161. * length is an illegal UTF-8 code point */
  162. static inline int mb_is_illegal(unsigned char *s, int n)
  163. {
  164. return ((n == 3) && (*s == 0xEF) && (*(s+1) == 0xBF) &&
  165. (*(s+2) >= 0xBE) && (*(s+2) <= 0xBF));
  166. }
  167. /* scan given source string, validate UTF-8 sequence and store result
  168. * in given buffer object */
  169. static int _validate_utf8(unsigned char **s, int l, struct template_buffer *buf)
  170. {
  171. unsigned char *ptr = *s;
  172. unsigned int o = 0, v, n;
  173. /* ascii byte without null */
  174. if ((*(ptr+0) >= 0x01) && (*(ptr+0) <= 0x7F))
  175. {
  176. if (!buf_putchar(buf, *ptr++))
  177. return 0;
  178. o = 1;
  179. }
  180. /* multi byte sequence */
  181. else if ((n = mb_num_chars(*ptr)) > 1)
  182. {
  183. /* count valid chars */
  184. for (v = 1; (v <= n) && ((o+v) < l) && mb_is_cont(*(ptr+v)); v++);
  185. switch (n)
  186. {
  187. case 6:
  188. case 5:
  189. /* five and six byte sequences are always invalid */
  190. if (!buf_putchar(buf, '?'))
  191. return 0;
  192. break;
  193. default:
  194. /* if the number of valid continuation bytes matches the
  195. * expected number and if the sequence is legal, copy
  196. * the bytes to the destination buffer */
  197. if ((v == n) && mb_is_shortest(ptr, n) &&
  198. !mb_is_surrogate(ptr, n) && !mb_is_illegal(ptr, n))
  199. {
  200. /* copy sequence */
  201. if (!buf_append(buf, (char *)ptr, n))
  202. return 0;
  203. }
  204. /* the found sequence is illegal, skip it */
  205. else
  206. {
  207. /* invalid sequence */
  208. if (!buf_putchar(buf, '?'))
  209. return 0;
  210. }
  211. break;
  212. }
  213. /* advance beyound the last found valid continuation char */
  214. o = v;
  215. ptr += v;
  216. }
  217. /* invalid byte (0x00) */
  218. else
  219. {
  220. if (!buf_putchar(buf, '?')) /* or 0xEF, 0xBF, 0xBD */
  221. return 0;
  222. o = 1;
  223. ptr++;
  224. }
  225. *s = ptr;
  226. return o;
  227. }
  228. /* sanitize given string and replace all invalid UTF-8 sequences with "?" */
  229. char * utf8(const char *s, unsigned int l)
  230. {
  231. struct template_buffer *buf = buf_init(l);
  232. unsigned char *ptr = (unsigned char *)s;
  233. unsigned int v, o;
  234. if (!buf)
  235. return NULL;
  236. for (o = 0; o < l; o++)
  237. {
  238. /* ascii char */
  239. if ((*ptr >= 0x01) && (*ptr <= 0x7F))
  240. {
  241. if (!buf_putchar(buf, (char)*ptr++))
  242. break;
  243. }
  244. /* invalid byte or multi byte sequence */
  245. else
  246. {
  247. if (!(v = _validate_utf8(&ptr, l - o, buf)))
  248. break;
  249. o += (v - 1);
  250. }
  251. }
  252. return buf_destroy(buf);
  253. }
  254. /* Sanitize given string and strip all invalid XML bytes
  255. * Validate UTF-8 sequences
  256. * Escape XML control chars */
  257. char * pcdata(const char *s, unsigned int l)
  258. {
  259. struct template_buffer *buf = buf_init(l);
  260. unsigned char *ptr = (unsigned char *)s;
  261. unsigned int o, v;
  262. char esq[8];
  263. int esl;
  264. if (!buf)
  265. return NULL;
  266. for (o = 0; o < l; o++)
  267. {
  268. /* Invalid XML bytes */
  269. if (((*ptr >= 0x00) && (*ptr <= 0x08)) ||
  270. ((*ptr >= 0x0B) && (*ptr <= 0x0C)) ||
  271. ((*ptr >= 0x0E) && (*ptr <= 0x1F)) ||
  272. (*ptr == 0x7F))
  273. {
  274. ptr++;
  275. }
  276. /* Escapes */
  277. else if ((*ptr == 0x26) ||
  278. (*ptr == 0x27) ||
  279. (*ptr == 0x22) ||
  280. (*ptr == 0x3C) ||
  281. (*ptr == 0x3E))
  282. {
  283. esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);
  284. if (!buf_append(buf, esq, esl))
  285. break;
  286. ptr++;
  287. }
  288. /* ascii char */
  289. else if (*ptr <= 0x7F)
  290. {
  291. buf_putchar(buf, (char)*ptr++);
  292. }
  293. /* multi byte sequence */
  294. else
  295. {
  296. if (!(v = _validate_utf8(&ptr, l - o, buf)))
  297. break;
  298. o += (v - 1);
  299. }
  300. }
  301. return buf_destroy(buf);
  302. }
  303. char * striptags(const char *s, unsigned int l)
  304. {
  305. struct template_buffer *buf = buf_init(l);
  306. unsigned char *ptr = (unsigned char *)s;
  307. unsigned char *end = ptr + l;
  308. unsigned char *tag;
  309. unsigned char prev;
  310. char esq[8];
  311. int esl;
  312. for (prev = ' '; ptr < end; ptr++)
  313. {
  314. if ((*ptr == '<') && ((ptr + 2) < end) &&
  315. ((*(ptr + 1) == '/') || isalpha(*(ptr + 1))))
  316. {
  317. for (tag = ptr; tag < end; tag++)
  318. {
  319. if (*tag == '>')
  320. {
  321. if (!isspace(prev))
  322. buf_putchar(buf, ' ');
  323. ptr = tag;
  324. prev = ' ';
  325. break;
  326. }
  327. }
  328. }
  329. else if (isspace(*ptr))
  330. {
  331. if (!isspace(prev))
  332. buf_putchar(buf, *ptr);
  333. prev = *ptr;
  334. }
  335. else
  336. {
  337. switch(*ptr)
  338. {
  339. case '"':
  340. case '\'':
  341. case '<':
  342. case '>':
  343. case '&':
  344. esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);
  345. buf_append(buf, esq, esl);
  346. break;
  347. default:
  348. buf_putchar(buf, *ptr);
  349. break;
  350. }
  351. prev = *ptr;
  352. }
  353. }
  354. return buf_destroy(buf);
  355. }
  356. void luastr_escape(struct template_buffer *out, const char *s, unsigned int l,
  357. int escape_xml)
  358. {
  359. int esl;
  360. char esq[8];
  361. char *ptr;
  362. for (ptr = (char *)s; ptr < (s + l); ptr++)
  363. {
  364. switch (*ptr)
  365. {
  366. case '\\':
  367. buf_append(out, "\\\\", 2);
  368. break;
  369. case '"':
  370. if (escape_xml)
  371. buf_append(out, "&#34;", 5);
  372. else
  373. buf_append(out, "\\\"", 2);
  374. break;
  375. case '\n':
  376. buf_append(out, "\\n", 2);
  377. break;
  378. case '\'':
  379. case '&':
  380. case '<':
  381. case '>':
  382. if (escape_xml)
  383. {
  384. esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr);
  385. buf_append(out, esq, esl);
  386. break;
  387. }
  388. default:
  389. buf_putchar(out, *ptr);
  390. }
  391. }
  392. }
  393. void luastr_translate(struct template_buffer *out, const char *s, unsigned int l,
  394. int escape_xml)
  395. {
  396. char *tr;
  397. int trlen;
  398. if (!lmo_translate(s, l, &tr, &trlen))
  399. luastr_escape(out, tr, trlen, escape_xml);
  400. else
  401. luastr_escape(out, s, l, escape_xml);
  402. }