strtod.c 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. #include <ctype.h>
  12. /*
  13. * This routine will convert to arbitrary precision
  14. * floating point entirely in multi-precision fixed.
  15. * The answer is the closest floating point number to
  16. * the given decimal number. Exactly half way are
  17. * rounded ala ieee rules.
  18. * Method is to scale input decimal between .500 and .999...
  19. * with external power of 2, then binary search for the
  20. * closest mantissa to this decimal number.
  21. * Nmant is is the required precision. (53 for ieee dp)
  22. * Nbits is the max number of bits/word. (must be <= 28)
  23. * Prec is calculated - the number of words of fixed mantissa.
  24. */
  25. enum
  26. {
  27. Nbits = 28, // bits safely represented in a ulong
  28. Nmant = 53, // bits of precision required
  29. Bias = 1022,
  30. Prec = (Nmant+Nbits+1)/Nbits, // words of Nbits each to represent mantissa
  31. Sigbit = 1<<(Prec*Nbits-Nmant), // first significant bit of Prec-th word
  32. Ndig = 1500,
  33. One = (uint32_t)(1<<Nbits),
  34. Half = (uint32_t)(One>>1),
  35. Maxe = 310,
  36. Fsign = 1<<0, // found -
  37. Fesign = 1<<1, // found e-
  38. Fdpoint = 1<<2, // found .
  39. S0 = 0, // _ _S0 +S1 #S2 .S3
  40. S1, // _+ #S2 .S3
  41. S2, // _+# #S2 .S4 eS5
  42. S3, // _+. #S4
  43. S4, // _+#.# #S4 eS5
  44. S5, // _+#.#e +S6 #S7
  45. S6, // _+#.#e+ #S7
  46. S7, // _+#.#e+# #S7
  47. };
  48. static int xcmp(const char*, char*);
  49. static int fpcmp(char*, uint32_t*);
  50. static void frnorm(uint32_t*);
  51. static void divascii(char*, int*, int*, int*);
  52. static void mulascii(char*, int*, int*, int*);
  53. static void divby(char*, int*, int);
  54. typedef struct Tab Tab;
  55. struct Tab
  56. {
  57. int bp;
  58. int siz;
  59. char* cmp;
  60. };
  61. double
  62. strtod(const char *as, char **aas)
  63. {
  64. int na, ona, ex, dp, bp, c, i, flag, state;
  65. uint32_t low[Prec], hig[Prec], mid[Prec], num, den;
  66. double d;
  67. const char *s;
  68. char a[Ndig];
  69. flag = 0; // Fsign, Fesign, Fdpoint
  70. na = 0; // number of digits of a[]
  71. dp = 0; // na of decimal point
  72. ex = 0; // exonent
  73. state = S0;
  74. for(s=as;; s++) {
  75. c = *s;
  76. if(c >= '0' && c <= '9') {
  77. switch(state) {
  78. case S0:
  79. case S1:
  80. case S2:
  81. state = S2;
  82. break;
  83. case S3:
  84. case S4:
  85. state = S4;
  86. break;
  87. case S5:
  88. case S6:
  89. case S7:
  90. state = S7;
  91. ex = ex*10 + (c-'0');
  92. continue;
  93. }
  94. if(na == 0 && c == '0') {
  95. dp--;
  96. continue;
  97. }
  98. if(na < Ndig-50)
  99. a[na++] = c;
  100. continue;
  101. }
  102. switch(c) {
  103. case '\t':
  104. case '\n':
  105. case '\v':
  106. case '\f':
  107. case '\r':
  108. case ' ':
  109. if(state == S0)
  110. continue;
  111. break;
  112. case '-':
  113. if(state == S0)
  114. flag |= Fsign;
  115. else
  116. flag |= Fesign;
  117. case '+':
  118. if(state == S0)
  119. state = S1;
  120. else
  121. if(state == S5)
  122. state = S6;
  123. else
  124. break; // syntax
  125. continue;
  126. case '.':
  127. flag |= Fdpoint;
  128. dp = na;
  129. if(state == S0 || state == S1) {
  130. state = S3;
  131. continue;
  132. }
  133. if(state == S2) {
  134. state = S4;
  135. continue;
  136. }
  137. break;
  138. case 'e':
  139. case 'E':
  140. if(state == S2 || state == S4) {
  141. state = S5;
  142. continue;
  143. }
  144. break;
  145. }
  146. break;
  147. }
  148. /*
  149. * clean up return char-pointer
  150. */
  151. switch(state) {
  152. case S0:
  153. if(xcmp(s, "nan") == 0) {
  154. if(aas != nil)
  155. *aas = (char *)s+3;
  156. goto retnan;
  157. }
  158. case S1:
  159. if(xcmp(s, "infinity") == 0) {
  160. if(aas != nil)
  161. *aas = (char *)s+8;
  162. goto retinf;
  163. }
  164. if(xcmp(s, "inf") == 0) {
  165. if(aas != nil)
  166. *aas = (char *)s+3;
  167. goto retinf;
  168. }
  169. case S3:
  170. if(aas != nil)
  171. *aas = (char *)as;
  172. goto ret0; // no digits found
  173. case S6:
  174. s--; // back over +-
  175. case S5:
  176. s--; // back over e
  177. break;
  178. }
  179. if(aas != nil)
  180. *aas = (char *)s;
  181. if(flag & Fdpoint)
  182. while(na > 0 && a[na-1] == '0')
  183. na--;
  184. if(na == 0)
  185. goto ret0; // zero
  186. a[na] = 0;
  187. if(!(flag & Fdpoint))
  188. dp = na;
  189. if(flag & Fesign)
  190. ex = -ex;
  191. dp += ex;
  192. if(dp < -Maxe-Nmant/3) /* actually -Nmant*log(2)/log(10), but Nmant/3 close enough */
  193. goto ret0; // underflow by exp
  194. else
  195. if(dp > +Maxe)
  196. goto retinf; // overflow by exp
  197. /*
  198. * normalize the decimal ascii number
  199. * to range .[5-9][0-9]* e0
  200. */
  201. bp = 0; // binary exponent
  202. while(dp > 0)
  203. divascii(a, &na, &dp, &bp);
  204. while(dp < 0 || a[0] < '5')
  205. mulascii(a, &na, &dp, &bp);
  206. a[na] = 0;
  207. /*
  208. * very small numbers are represented using
  209. * bp = -Bias+1. adjust accordingly.
  210. */
  211. if(bp < -Bias+1){
  212. ona = na;
  213. divby(a, &na, -bp-Bias+1);
  214. if(na < ona){
  215. memmove(a+ona-na, a, na);
  216. memset(a, '0', ona-na);
  217. na = ona;
  218. }
  219. a[na] = 0;
  220. bp = -Bias+1;
  221. }
  222. /* close approx by naive conversion */
  223. num = 0;
  224. den = 1;
  225. for(i=0; i<9 && (c=a[i]); i++) {
  226. num = num*10 + (c-'0');
  227. den *= 10;
  228. }
  229. low[0] = umuldiv(num, One, den);
  230. hig[0] = umuldiv(num+1, One, den);
  231. for(i=1; i<Prec; i++) {
  232. low[i] = 0;
  233. hig[i] = One-1;
  234. }
  235. /* binary search for closest mantissa */
  236. for(;;) {
  237. /* mid = (hig + low) / 2 */
  238. c = 0;
  239. for(i=0; i<Prec; i++) {
  240. mid[i] = hig[i] + low[i];
  241. if(c)
  242. mid[i] += One;
  243. c = mid[i] & 1;
  244. mid[i] >>= 1;
  245. }
  246. frnorm(mid);
  247. /* compare */
  248. c = fpcmp(a, mid);
  249. if(c > 0) {
  250. c = 1;
  251. for(i=0; i<Prec; i++)
  252. if(low[i] != mid[i]) {
  253. c = 0;
  254. low[i] = mid[i];
  255. }
  256. if(c)
  257. break; // between mid and hig
  258. continue;
  259. }
  260. if(c < 0) {
  261. for(i=0; i<Prec; i++)
  262. hig[i] = mid[i];
  263. continue;
  264. }
  265. /* only hard part is if even/odd roundings wants to go up */
  266. c = mid[Prec-1] & (Sigbit-1);
  267. if(c == Sigbit/2 && (mid[Prec-1]&Sigbit) == 0)
  268. mid[Prec-1] -= c;
  269. break; // exactly mid
  270. }
  271. /* normal rounding applies */
  272. c = mid[Prec-1] & (Sigbit-1);
  273. mid[Prec-1] -= c;
  274. if(c >= Sigbit/2) {
  275. mid[Prec-1] += Sigbit;
  276. frnorm(mid);
  277. }
  278. d = 0;
  279. for(i=0; i<Prec; i++)
  280. d = d*One + mid[i];
  281. if(flag & Fsign)
  282. d = -d;
  283. d = ldexp(d, bp - Prec*Nbits);
  284. return d;
  285. ret0:
  286. return 0;
  287. retnan:
  288. return NaN();
  289. retinf:
  290. if(flag & Fsign)
  291. return Inf(-1);
  292. return Inf(+1);
  293. }
  294. static void
  295. frnorm(uint32_t *f)
  296. {
  297. int i, c;
  298. c = 0;
  299. for(i=Prec-1; i>0; i--) {
  300. f[i] += c;
  301. c = f[i] >> Nbits;
  302. f[i] &= One-1;
  303. }
  304. f[0] += c;
  305. }
  306. static int
  307. fpcmp(char *a, uint32_t* f)
  308. {
  309. uint32_t tf[Prec];
  310. int i, d, c;
  311. for(i=0; i<Prec; i++)
  312. tf[i] = f[i];
  313. for(;;) {
  314. /* tf *= 10 */
  315. for(i=0; i<Prec; i++)
  316. tf[i] = tf[i]*10;
  317. frnorm(tf);
  318. d = (tf[0] >> Nbits) + '0';
  319. tf[0] &= One-1;
  320. /* compare next digit */
  321. c = *a;
  322. if(c == 0) {
  323. if('0' < d)
  324. return -1;
  325. if(tf[0] != 0)
  326. goto cont;
  327. for(i=1; i<Prec; i++)
  328. if(tf[i] != 0)
  329. goto cont;
  330. return 0;
  331. }
  332. if(c > d)
  333. return +1;
  334. if(c < d)
  335. return -1;
  336. a++;
  337. cont:;
  338. }
  339. }
  340. static void
  341. _divby(char *a, int *na, int b)
  342. {
  343. int n, c;
  344. char *p;
  345. p = a;
  346. n = 0;
  347. while(n>>b == 0) {
  348. c = *a++;
  349. if(c == 0) {
  350. while(n) {
  351. c = n*10;
  352. if(c>>b)
  353. break;
  354. n = c;
  355. }
  356. goto xx;
  357. }
  358. n = n*10 + c-'0';
  359. (*na)--;
  360. }
  361. for(;;) {
  362. c = n>>b;
  363. n -= c<<b;
  364. *p++ = c + '0';
  365. c = *a++;
  366. if(c == 0)
  367. break;
  368. n = n*10 + c-'0';
  369. }
  370. (*na)++;
  371. xx:
  372. while(n) {
  373. n = n*10;
  374. c = n>>b;
  375. n -= c<<b;
  376. *p++ = c + '0';
  377. (*na)++;
  378. }
  379. *p = 0;
  380. }
  381. static void
  382. divby(char *a, int *na, int b)
  383. {
  384. while(b > 9){
  385. _divby(a, na, 9);
  386. a[*na] = 0;
  387. b -= 9;
  388. }
  389. if(b > 0)
  390. _divby(a, na, b);
  391. }
  392. static Tab tab1[] =
  393. {
  394. { 1, 0, "" },
  395. { 3, 1, "7" },
  396. { 6, 2, "63" },
  397. { 9, 3, "511" },
  398. { 13, 4, "8191" },
  399. { 16, 5, "65535" },
  400. { 19, 6, "524287" },
  401. { 23, 7, "8388607" },
  402. { 26, 8, "67108863" },
  403. { 27, 9, "134217727" },
  404. };
  405. static void
  406. divascii(char *a, int *na, int *dp, int *bp)
  407. {
  408. int b, d;
  409. Tab *t;
  410. d = *dp;
  411. if(d >= nelem(tab1))
  412. d = nelem(tab1)-1;
  413. t = tab1 + d;
  414. b = t->bp;
  415. if(memcmp(a, t->cmp, t->siz) > 0)
  416. d--;
  417. *dp -= d;
  418. *bp += b;
  419. divby(a, na, b);
  420. }
  421. static void
  422. mulby(char *a, char *p, char *q, int b)
  423. {
  424. int n, c;
  425. n = 0;
  426. *p = 0;
  427. for(;;) {
  428. q--;
  429. if(q < a)
  430. break;
  431. c = *q - '0';
  432. c = (c<<b) + n;
  433. n = c/10;
  434. c -= n*10;
  435. p--;
  436. *p = c + '0';
  437. }
  438. while(n) {
  439. c = n;
  440. n = c/10;
  441. c -= n*10;
  442. p--;
  443. *p = c + '0';
  444. }
  445. }
  446. static Tab tab2[] =
  447. {
  448. { 1, 1, "" }, // dp = 0-0
  449. { 3, 3, "125" },
  450. { 6, 5, "15625" },
  451. { 9, 7, "1953125" },
  452. { 13, 10, "1220703125" },
  453. { 16, 12, "152587890625" },
  454. { 19, 14, "19073486328125" },
  455. { 23, 17, "11920928955078125" },
  456. { 26, 19, "1490116119384765625" },
  457. { 27, 19, "7450580596923828125" }, // dp 8-9
  458. };
  459. static void
  460. mulascii(char *a, int *na, int *dp, int *bp)
  461. {
  462. char *p;
  463. int d, b;
  464. Tab *t;
  465. d = -*dp;
  466. if(d >= nelem(tab2))
  467. d = nelem(tab2)-1;
  468. t = tab2 + d;
  469. b = t->bp;
  470. if(memcmp(a, t->cmp, t->siz) < 0)
  471. d--;
  472. p = a + *na;
  473. *bp -= b;
  474. *dp += d;
  475. *na += d;
  476. mulby(a, p+d, p, b);
  477. }
  478. static int
  479. xcmp(const char *a, char *b)
  480. {
  481. int c1, c2;
  482. while((c1 = *b++) != 0) {
  483. c2 = *a++;
  484. if(isupper(c2))
  485. c2 = tolower(c2);
  486. if(c1 != c2)
  487. return 1;
  488. }
  489. return 0;
  490. }