process_escape_sequence.c 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * Utility routines.
  4. *
  5. * Copyright (C) Manuel Novoa III <mjn3@codepoet.org>
  6. * and Vladimir Oleynik <dzo@simtreas.ru>
  7. *
  8. * Licensed under GPLv2 or later, see file LICENSE in this source tree.
  9. */
  10. #include "libbb.h"
  11. #define WANT_HEX_ESCAPES 1
  12. /* Usual "this only works for ascii compatible encodings" disclaimer. */
  13. #undef _tolower
  14. #define _tolower(X) ((X)|((char) 0x20))
  15. char FAST_FUNC bb_process_escape_sequence(const char **ptr)
  16. {
  17. const char *q;
  18. unsigned num_digits;
  19. unsigned n;
  20. unsigned base;
  21. num_digits = n = 0;
  22. base = 8;
  23. q = *ptr;
  24. if (WANT_HEX_ESCAPES && *q == 'x') {
  25. ++q;
  26. base = 16;
  27. ++num_digits;
  28. }
  29. /* bash requires leading 0 in octal escapes:
  30. * \02 works, \2 does not (prints \ and 2).
  31. * We treat \2 as a valid octal escape sequence. */
  32. do {
  33. unsigned r;
  34. unsigned d = (unsigned char)(*q) - '0';
  35. #if WANT_HEX_ESCAPES
  36. if (d >= 10) {
  37. d = (unsigned char)_tolower(*q) - 'a';
  38. //d += 10;
  39. /* The above would map 'A'-'F' and 'a'-'f' to 10-15,
  40. * however, some chars like '@' would map to 9 < base.
  41. * Do not allow that, map invalid chars to N > base:
  42. */
  43. if ((int)d >= 0)
  44. d += 10;
  45. }
  46. #endif
  47. if (d >= base) {
  48. if (WANT_HEX_ESCAPES && base == 16) {
  49. --num_digits;
  50. if (num_digits == 0) {
  51. /* \x<bad_char>: return '\',
  52. * leave ptr pointing to x */
  53. return '\\';
  54. }
  55. }
  56. break;
  57. }
  58. r = n * base + d;
  59. if (r > UCHAR_MAX) {
  60. break;
  61. }
  62. n = r;
  63. ++q;
  64. } while (++num_digits < 3);
  65. if (num_digits == 0) {
  66. /* Not octal or hex escape sequence.
  67. * Is it one-letter one? */
  68. /* bash builtin "echo -e '\ec'" interprets \e as ESC,
  69. * but coreutils "/bin/echo -e '\ec'" does not.
  70. * Manpages tend to support coreutils way.
  71. * Update: coreutils added support for \e on 28 Oct 2009. */
  72. static const char charmap[] ALIGN1 = {
  73. 'a', 'b', 'e', 'f', 'n', 'r', 't', 'v', '\\', '\0',
  74. '\a', '\b', 27, '\f', '\n', '\r', '\t', '\v', '\\', '\\',
  75. };
  76. const char *p = charmap;
  77. do {
  78. if (*p == *q) {
  79. q++;
  80. break;
  81. }
  82. } while (*++p != '\0');
  83. /* p points to found escape char or NUL,
  84. * advance it and find what it translates to.
  85. * Note that \NUL and unrecognized sequence \z return '\'
  86. * and leave ptr pointing to NUL or z. */
  87. n = p[sizeof(charmap) / 2];
  88. }
  89. *ptr = q;
  90. return (char) n;
  91. }
  92. char* FAST_FUNC strcpy_and_process_escape_sequences(char *dst, const char *src)
  93. {
  94. while (1) {
  95. char c, c1;
  96. c = c1 = *src++;
  97. if (c1 == '\\')
  98. c1 = bb_process_escape_sequence(&src);
  99. *dst = c1;
  100. if (c == '\0')
  101. return dst;
  102. dst++;
  103. }
  104. }