strtod.c 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <ctype.h>
  4. /*
  5. * This routine will convert to arbitrary precision
  6. * floating point entirely in multi-precision fixed.
  7. * The answer is the closest floating point number to
  8. * the given decimal number. Exactly half way are
  9. * rounded ala ieee rules.
  10. * Method is to scale input decimal between .500 and .999...
  11. * with external power of 2, then binary search for the
  12. * closest mantissa to this decimal number.
  13. * Nmant is is the required precision. (53 for ieee dp)
  14. * Nbits is the max number of bits/word. (must be <= 28)
  15. * Prec is calculated - the number of words of fixed mantissa.
  16. */
  17. enum
  18. {
  19. Nbits = 28, // bits safely represented in a ulong
  20. Nmant = 53, // bits of precision required
  21. Bias = 1022,
  22. Prec = (Nmant+Nbits+1)/Nbits, // words of Nbits each to represent mantissa
  23. Sigbit = 1<<(Prec*Nbits-Nmant), // first significant bit of Prec-th word
  24. Ndig = 1500,
  25. One = (ulong)(1<<Nbits),
  26. Half = (ulong)(One>>1),
  27. Maxe = 310,
  28. Fsign = 1<<0, // found -
  29. Fesign = 1<<1, // found e-
  30. Fdpoint = 1<<2, // found .
  31. S0 = 0, // _ _S0 +S1 #S2 .S3
  32. S1, // _+ #S2 .S3
  33. S2, // _+# #S2 .S4 eS5
  34. S3, // _+. #S4
  35. S4, // _+#.# #S4 eS5
  36. S5, // _+#.#e +S6 #S7
  37. S6, // _+#.#e+ #S7
  38. S7, // _+#.#e+# #S7
  39. };
  40. static int xcmp(char*, char*);
  41. static int fpcmp(char*, ulong*);
  42. static void frnorm(ulong*);
  43. static void divascii(char*, int*, int*, int*);
  44. static void mulascii(char*, int*, int*, int*);
  45. static void divby(char*, int*, int);
  46. typedef struct Tab Tab;
  47. struct Tab
  48. {
  49. int bp;
  50. int siz;
  51. char* cmp;
  52. };
  53. double
  54. strtod(char *as, char **aas)
  55. {
  56. int na, ona, ex, dp, bp, c, i, flag, state;
  57. ulong low[Prec], hig[Prec], mid[Prec], num, den;
  58. double d;
  59. char *s, a[Ndig];
  60. flag = 0; // Fsign, Fesign, Fdpoint
  61. na = 0; // number of digits of a[]
  62. dp = 0; // na of decimal point
  63. ex = 0; // exonent
  64. state = S0;
  65. for(s=as;; s++) {
  66. c = *s;
  67. if(c >= '0' && c <= '9') {
  68. switch(state) {
  69. case S0:
  70. case S1:
  71. case S2:
  72. state = S2;
  73. break;
  74. case S3:
  75. case S4:
  76. state = S4;
  77. break;
  78. case S5:
  79. case S6:
  80. case S7:
  81. state = S7;
  82. ex = ex*10 + (c-'0');
  83. continue;
  84. }
  85. if(na == 0 && c == '0') {
  86. dp--;
  87. continue;
  88. }
  89. if(na < Ndig-50)
  90. a[na++] = c;
  91. continue;
  92. }
  93. switch(c) {
  94. case '\t':
  95. case '\n':
  96. case '\v':
  97. case '\f':
  98. case '\r':
  99. case ' ':
  100. if(state == S0)
  101. continue;
  102. break;
  103. case '-':
  104. if(state == S0)
  105. flag |= Fsign;
  106. else
  107. flag |= Fesign;
  108. case '+':
  109. if(state == S0)
  110. state = S1;
  111. else
  112. if(state == S5)
  113. state = S6;
  114. else
  115. break; // syntax
  116. continue;
  117. case '.':
  118. flag |= Fdpoint;
  119. dp = na;
  120. if(state == S0 || state == S1) {
  121. state = S3;
  122. continue;
  123. }
  124. if(state == S2) {
  125. state = S4;
  126. continue;
  127. }
  128. break;
  129. case 'e':
  130. case 'E':
  131. if(state == S2 || state == S4) {
  132. state = S5;
  133. continue;
  134. }
  135. break;
  136. }
  137. break;
  138. }
  139. /*
  140. * clean up return char-pointer
  141. */
  142. switch(state) {
  143. case S0:
  144. if(xcmp(s, "nan") == 0) {
  145. if(aas != nil)
  146. *aas = s+3;
  147. goto retnan;
  148. }
  149. case S1:
  150. if(xcmp(s, "infinity") == 0) {
  151. if(aas != nil)
  152. *aas = s+8;
  153. goto retinf;
  154. }
  155. if(xcmp(s, "inf") == 0) {
  156. if(aas != nil)
  157. *aas = s+3;
  158. goto retinf;
  159. }
  160. case S3:
  161. if(aas != nil)
  162. *aas = as;
  163. goto ret0; // no digits found
  164. case S6:
  165. s--; // back over +-
  166. case S5:
  167. s--; // back over e
  168. break;
  169. }
  170. if(aas != nil)
  171. *aas = s;
  172. if(flag & Fdpoint)
  173. while(na > 0 && a[na-1] == '0')
  174. na--;
  175. if(na == 0)
  176. goto ret0; // zero
  177. a[na] = 0;
  178. if(!(flag & Fdpoint))
  179. dp = na;
  180. if(flag & Fesign)
  181. ex = -ex;
  182. dp += ex;
  183. if(dp < -Maxe-Nmant/3) /* actually -Nmant*log(2)/log(10), but Nmant/3 close enough */
  184. goto ret0; // underflow by exp
  185. else
  186. if(dp > +Maxe)
  187. goto retinf; // overflow by exp
  188. /*
  189. * normalize the decimal ascii number
  190. * to range .[5-9][0-9]* e0
  191. */
  192. bp = 0; // binary exponent
  193. while(dp > 0)
  194. divascii(a, &na, &dp, &bp);
  195. while(dp < 0 || a[0] < '5')
  196. mulascii(a, &na, &dp, &bp);
  197. a[na] = 0;
  198. /*
  199. * very small numbers are represented using
  200. * bp = -Bias+1. adjust accordingly.
  201. */
  202. if(bp < -Bias+1){
  203. ona = na;
  204. divby(a, &na, -bp-Bias+1);
  205. if(na < ona){
  206. memmove(a+ona-na, a, na);
  207. memset(a, '0', ona-na);
  208. na = ona;
  209. }
  210. a[na] = 0;
  211. bp = -Bias+1;
  212. }
  213. /* close approx by naive conversion */
  214. num = 0;
  215. den = 1;
  216. for(i=0; i<9 && (c=a[i]); i++) {
  217. num = num*10 + (c-'0');
  218. den *= 10;
  219. }
  220. low[0] = umuldiv(num, One, den);
  221. hig[0] = umuldiv(num+1, One, den);
  222. for(i=1; i<Prec; i++) {
  223. low[i] = 0;
  224. hig[i] = One-1;
  225. }
  226. /* binary search for closest mantissa */
  227. for(;;) {
  228. /* mid = (hig + low) / 2 */
  229. c = 0;
  230. for(i=0; i<Prec; i++) {
  231. mid[i] = hig[i] + low[i];
  232. if(c)
  233. mid[i] += One;
  234. c = mid[i] & 1;
  235. mid[i] >>= 1;
  236. }
  237. frnorm(mid);
  238. /* compare */
  239. c = fpcmp(a, mid);
  240. if(c > 0) {
  241. c = 1;
  242. for(i=0; i<Prec; i++)
  243. if(low[i] != mid[i]) {
  244. c = 0;
  245. low[i] = mid[i];
  246. }
  247. if(c)
  248. break; // between mid and hig
  249. continue;
  250. }
  251. if(c < 0) {
  252. for(i=0; i<Prec; i++)
  253. hig[i] = mid[i];
  254. continue;
  255. }
  256. /* only hard part is if even/odd roundings wants to go up */
  257. c = mid[Prec-1] & (Sigbit-1);
  258. if(c == Sigbit/2 && (mid[Prec-1]&Sigbit) == 0)
  259. mid[Prec-1] -= c;
  260. break; // exactly mid
  261. }
  262. /* normal rounding applies */
  263. c = mid[Prec-1] & (Sigbit-1);
  264. mid[Prec-1] -= c;
  265. if(c >= Sigbit/2) {
  266. mid[Prec-1] += Sigbit;
  267. frnorm(mid);
  268. }
  269. d = 0;
  270. for(i=0; i<Prec; i++)
  271. d = d*One + mid[i];
  272. if(flag & Fsign)
  273. d = -d;
  274. d = ldexp(d, bp - Prec*Nbits);
  275. return d;
  276. ret0:
  277. return 0;
  278. retnan:
  279. return NaN();
  280. retinf:
  281. if(flag & Fsign)
  282. return Inf(-1);
  283. return Inf(+1);
  284. }
  285. static void
  286. frnorm(ulong *f)
  287. {
  288. int i, c;
  289. c = 0;
  290. for(i=Prec-1; i>0; i--) {
  291. f[i] += c;
  292. c = f[i] >> Nbits;
  293. f[i] &= One-1;
  294. }
  295. f[0] += c;
  296. }
  297. static int
  298. fpcmp(char *a, ulong* f)
  299. {
  300. ulong tf[Prec];
  301. int i, d, c;
  302. for(i=0; i<Prec; i++)
  303. tf[i] = f[i];
  304. for(;;) {
  305. /* tf *= 10 */
  306. for(i=0; i<Prec; i++)
  307. tf[i] = tf[i]*10;
  308. frnorm(tf);
  309. d = (tf[0] >> Nbits) + '0';
  310. tf[0] &= One-1;
  311. /* compare next digit */
  312. c = *a;
  313. if(c == 0) {
  314. if('0' < d)
  315. return -1;
  316. if(tf[0] != 0)
  317. goto cont;
  318. for(i=1; i<Prec; i++)
  319. if(tf[i] != 0)
  320. goto cont;
  321. return 0;
  322. }
  323. if(c > d)
  324. return +1;
  325. if(c < d)
  326. return -1;
  327. a++;
  328. cont:;
  329. }
  330. }
  331. static void
  332. _divby(char *a, int *na, int b)
  333. {
  334. int n, c;
  335. char *p;
  336. p = a;
  337. n = 0;
  338. while(n>>b == 0) {
  339. c = *a++;
  340. if(c == 0) {
  341. while(n) {
  342. c = n*10;
  343. if(c>>b)
  344. break;
  345. n = c;
  346. }
  347. goto xx;
  348. }
  349. n = n*10 + c-'0';
  350. (*na)--;
  351. }
  352. for(;;) {
  353. c = n>>b;
  354. n -= c<<b;
  355. *p++ = c + '0';
  356. c = *a++;
  357. if(c == 0)
  358. break;
  359. n = n*10 + c-'0';
  360. }
  361. (*na)++;
  362. xx:
  363. while(n) {
  364. n = n*10;
  365. c = n>>b;
  366. n -= c<<b;
  367. *p++ = c + '0';
  368. (*na)++;
  369. }
  370. *p = 0;
  371. }
  372. static void
  373. divby(char *a, int *na, int b)
  374. {
  375. while(b > 9){
  376. _divby(a, na, 9);
  377. a[*na] = 0;
  378. b -= 9;
  379. }
  380. if(b > 0)
  381. _divby(a, na, b);
  382. }
  383. static Tab tab1[] =
  384. {
  385. 1, 0, "",
  386. 3, 1, "7",
  387. 6, 2, "63",
  388. 9, 3, "511",
  389. 13, 4, "8191",
  390. 16, 5, "65535",
  391. 19, 6, "524287",
  392. 23, 7, "8388607",
  393. 26, 8, "67108863",
  394. 27, 9, "134217727",
  395. };
  396. static void
  397. divascii(char *a, int *na, int *dp, int *bp)
  398. {
  399. int b, d;
  400. Tab *t;
  401. d = *dp;
  402. if(d >= nelem(tab1))
  403. d = nelem(tab1)-1;
  404. t = tab1 + d;
  405. b = t->bp;
  406. if(memcmp(a, t->cmp, t->siz) > 0)
  407. d--;
  408. *dp -= d;
  409. *bp += b;
  410. divby(a, na, b);
  411. }
  412. static void
  413. mulby(char *a, char *p, char *q, int b)
  414. {
  415. int n, c;
  416. n = 0;
  417. *p = 0;
  418. for(;;) {
  419. q--;
  420. if(q < a)
  421. break;
  422. c = *q - '0';
  423. c = (c<<b) + n;
  424. n = c/10;
  425. c -= n*10;
  426. p--;
  427. *p = c + '0';
  428. }
  429. while(n) {
  430. c = n;
  431. n = c/10;
  432. c -= n*10;
  433. p--;
  434. *p = c + '0';
  435. }
  436. }
  437. static Tab tab2[] =
  438. {
  439. 1, 1, "", // dp = 0-0
  440. 3, 3, "125",
  441. 6, 5, "15625",
  442. 9, 7, "1953125",
  443. 13, 10, "1220703125",
  444. 16, 12, "152587890625",
  445. 19, 14, "19073486328125",
  446. 23, 17, "11920928955078125",
  447. 26, 19, "1490116119384765625",
  448. 27, 19, "7450580596923828125", // dp 8-9
  449. };
  450. static void
  451. mulascii(char *a, int *na, int *dp, int *bp)
  452. {
  453. char *p;
  454. int d, b;
  455. Tab *t;
  456. d = -*dp;
  457. if(d >= nelem(tab2))
  458. d = nelem(tab2)-1;
  459. t = tab2 + d;
  460. b = t->bp;
  461. if(memcmp(a, t->cmp, t->siz) < 0)
  462. d--;
  463. p = a + *na;
  464. *bp -= b;
  465. *dp += d;
  466. *na += d;
  467. mulby(a, p+d, p, b);
  468. }
  469. static int
  470. xcmp(char *a, char *b)
  471. {
  472. int c1, c2;
  473. while(c1 = *b++) {
  474. c2 = *a++;
  475. if(isupper(c2))
  476. c2 = tolower(c2);
  477. if(c1 != c2)
  478. return 1;
  479. }
  480. return 0;
  481. }