get_line_from_file.c 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * Utility routines.
  4. *
  5. * Copyright (C) 2005, 2006 Rob Landley <rob@landley.net>
  6. * Copyright (C) 2004 Erik Andersen <andersen@codepoet.org>
  7. * Copyright (C) 2001 Matt Krai
  8. *
  9. * Licensed under GPLv2 or later, see file LICENSE in this source tree.
  10. */
  11. #include "libbb.h"
  12. char* FAST_FUNC bb_get_chunk_from_file(FILE *file, size_t *end)
  13. {
  14. int ch;
  15. size_t idx = 0;
  16. char *linebuf = NULL;
  17. while ((ch = getc(file)) != EOF) {
  18. /* grow the line buffer as necessary */
  19. if (!(idx & 0xff)) {
  20. if (idx == ((size_t)-1) - 0xff)
  21. bb_die_memory_exhausted();
  22. linebuf = xrealloc(linebuf, idx + 0x100);
  23. }
  24. linebuf[idx++] = (char) ch;
  25. if (ch == '\0')
  26. break;
  27. if (end && ch == '\n')
  28. break;
  29. }
  30. if (end)
  31. *end = idx;
  32. if (linebuf) {
  33. // huh, does fgets discard prior data on error like this?
  34. // I don't think so....
  35. //if (ferror(file)) {
  36. // free(linebuf);
  37. // return NULL;
  38. //}
  39. linebuf = xrealloc(linebuf, idx + 1);
  40. linebuf[idx] = '\0';
  41. }
  42. return linebuf;
  43. }
  44. /* Get line, including trailing \n if any */
  45. char* FAST_FUNC xmalloc_fgets(FILE *file)
  46. {
  47. size_t i;
  48. return bb_get_chunk_from_file(file, &i);
  49. }
  50. /* Get line. Remove trailing \n */
  51. char* FAST_FUNC xmalloc_fgetline(FILE *file)
  52. {
  53. size_t i;
  54. char *c = bb_get_chunk_from_file(file, &i);
  55. if (i && c[--i] == '\n')
  56. c[i] = '\0';
  57. return c;
  58. }
  59. #if 0
  60. /* GNUism getline() should be faster (not tested) than a loop with fgetc */
  61. /* Get line, including trailing \n if any */
  62. char* FAST_FUNC xmalloc_fgets(FILE *file)
  63. {
  64. char *res_buf = NULL;
  65. size_t res_sz;
  66. if (getline(&res_buf, &res_sz, file) == -1) {
  67. free(res_buf); /* uclibc allocates a buffer even on EOF. WTF? */
  68. res_buf = NULL;
  69. }
  70. //TODO: trimming to res_sz?
  71. return res_buf;
  72. }
  73. /* Get line. Remove trailing \n */
  74. char* FAST_FUNC xmalloc_fgetline(FILE *file)
  75. {
  76. char *res_buf = NULL;
  77. size_t res_sz;
  78. res_sz = getline(&res_buf, &res_sz, file);
  79. if ((ssize_t)res_sz != -1) {
  80. if (res_buf[res_sz - 1] == '\n')
  81. res_buf[--res_sz] = '\0';
  82. //TODO: trimming to res_sz?
  83. } else {
  84. free(res_buf); /* uclibc allocates a buffer even on EOF. WTF? */
  85. res_buf = NULL;
  86. }
  87. return res_buf;
  88. }
  89. #endif
  90. #if 0
  91. /* Faster routines (~twice as fast). +170 bytes. Unused as of 2008-07.
  92. *
  93. * NB: they stop at NUL byte too.
  94. * Performance is important here. Think "grep 50gigabyte_file"...
  95. * Ironically, grep can't use it because of NUL issue.
  96. * We sorely need C lib to provide fgets which reports size!
  97. *
  98. * Update:
  99. * Actually, uclibc and glibc have it. man getline. It's GNUism,
  100. * but very useful one (if it's as fast as this code).
  101. * TODO:
  102. * - currently, sed and sort use bb_get_chunk_from_file and heavily
  103. * depend on its "stop on \n or \0" behavior, and STILL they fail
  104. * to handle all cases with embedded NULs correctly. So:
  105. * - audit sed and sort; convert them to getline FIRST.
  106. * - THEN ditch bb_get_chunk_from_file, replace it with getline.
  107. * - provide getline implementation for non-GNU systems.
  108. */
  109. static char* xmalloc_fgets_internal(FILE *file, int *sizep)
  110. {
  111. int len;
  112. int idx = 0;
  113. char *linebuf = NULL;
  114. while (1) {
  115. char *r;
  116. linebuf = xrealloc(linebuf, idx + 0x100);
  117. r = fgets(&linebuf[idx], 0x100, file);
  118. if (!r) {
  119. /* need to terminate in case this is error
  120. * (EOF puts NUL itself) */
  121. linebuf[idx] = '\0';
  122. break;
  123. }
  124. /* stupid. fgets knows the len, it should report it somehow */
  125. len = strlen(&linebuf[idx]);
  126. idx += len;
  127. if (len != 0xff || linebuf[idx - 1] == '\n')
  128. break;
  129. }
  130. *sizep = idx;
  131. if (idx) {
  132. /* xrealloc(linebuf, idx + 1) is up to caller */
  133. return linebuf;
  134. }
  135. free(linebuf);
  136. return NULL;
  137. }
  138. /* Get line, remove trailing \n */
  139. char* FAST_FUNC xmalloc_fgetline_fast(FILE *file)
  140. {
  141. int sz;
  142. char *r = xmalloc_fgets_internal(file, &sz);
  143. if (r && r[sz - 1] == '\n')
  144. r[--sz] = '\0';
  145. return r; /* not xrealloc(r, sz + 1)! */
  146. }
  147. char* FAST_FUNC xmalloc_fgets(FILE *file)
  148. {
  149. int sz;
  150. return xmalloc_fgets_internal(file, &sz);
  151. }
  152. /* Get line, remove trailing \n */
  153. char* FAST_FUNC xmalloc_fgetline(FILE *file)
  154. {
  155. int sz;
  156. char *r = xmalloc_fgets_internal(file, &sz);
  157. if (!r)
  158. return r;
  159. if (r[sz - 1] == '\n')
  160. r[--sz] = '\0';
  161. return xrealloc(r, sz + 1);
  162. }
  163. #endif