1 /* mpf_set_str (dest, string, base) -- Convert the string STRING
2    in base BASE to a float in dest.  If BASE is zero, the leading characters
3    of STRING is used to figure out the base.
4 
5 Copyright 1993-1997, 2000-2003, 2005, 2007, 2008, 2011, 2013, 2019 Free
6 Software Foundation, Inc.
7 
8 This file is part of the GNU MP Library.
9 
10 The GNU MP Library is free software; you can redistribute it and/or modify
11 it under the terms of either:
12 
13   * the GNU Lesser General Public License as published by the Free
14     Software Foundation; either version 3 of the License, or (at your
15     option) any later version.
16 
17 or
18 
19   * the GNU General Public License as published by the Free Software
20     Foundation; either version 2 of the License, or (at your option) any
21     later version.
22 
23 or both in parallel, as here.
24 
25 The GNU MP Library is distributed in the hope that it will be useful, but
26 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
28 for more details.
29 
30 You should have received copies of the GNU General Public License and the
31 GNU Lesser General Public License along with the GNU MP Library.  If not,
32 see https://www.gnu.org/licenses/.  */
33 
34 /*
35   This still needs work, as suggested by some FIXME comments.
36   1. Don't depend on superfluous mantissa digits.
37   2. Allocate temp space more cleverly.
38   3. Use mpn_div_q instead of mpn_lshift+mpn_divrem.
39 */
40 
41 #define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */
42 
43 #include "config.h"
44 
45 #include <stdlib.h>
46 #include <string.h>
47 #include <ctype.h>
48 
49 #if HAVE_LANGINFO_H
50 #include <langinfo.h>  /* for nl_langinfo */
51 #endif
52 
53 #if HAVE_LOCALE_H
54 #include <locale.h>    /* for localeconv */
55 #endif
56 
57 #include "gmp-impl.h"
58 #include "longlong.h"
59 
60 
61 #define digit_value_tab __gmp_digit_value_tab
62 
63 /* Compute base^exp and return the most significant prec limbs in rp[].
64    Put the count of omitted low limbs in *ign.
65    Return the actual size (which might be less than prec).  */
66 static mp_size_t
mpn_pow_1_highpart(mp_ptr rp,mp_size_t * ignp,mp_limb_t base,mp_exp_t exp,mp_size_t prec,mp_ptr tp)67 mpn_pow_1_highpart (mp_ptr rp, mp_size_t *ignp,
68                         mp_limb_t base, mp_exp_t exp,
69                         mp_size_t prec, mp_ptr tp)
70 {
71   mp_size_t ign;              /* counts number of ignored low limbs in r */
72   mp_size_t off;              /* keeps track of offset where value starts */
73   mp_ptr passed_rp = rp;
74   mp_size_t rn;
75   int cnt;
76   int i;
77 
78   rp[0] = base;
79   rn = 1;
80   off = 0;
81   ign = 0;
82   count_leading_zeros (cnt, exp);
83   for (i = GMP_LIMB_BITS - cnt - 2; i >= 0; i--)
84     {
85       mpn_sqr (tp, rp + off, rn);
86       rn = 2 * rn;
87       rn -= tp[rn - 1] == 0;
88       ign <<= 1;
89 
90       off = 0;
91       if (rn > prec)
92           {
93             ign += rn - prec;
94             off = rn - prec;
95             rn = prec;
96           }
97       MP_PTR_SWAP (rp, tp);
98 
99       if (((exp >> i) & 1) != 0)
100           {
101             mp_limb_t cy;
102             cy = mpn_mul_1 (rp, rp + off, rn, base);
103             rp[rn] = cy;
104             rn += cy != 0;
105             off = 0;
106           }
107     }
108 
109   if (rn > prec)
110     {
111       ign += rn - prec;
112       rp += rn - prec;
113       rn = prec;
114     }
115 
116   MPN_COPY_INCR (passed_rp, rp + off, rn);
117   *ignp = ign;
118   return rn;
119 }
120 
121 int
mpf_set_str(mpf_ptr x,const char * str,int base)122 mpf_set_str (mpf_ptr x, const char *str, int base)
123 {
124   size_t str_size;
125   char *s, *begs;
126   size_t i, j;
127   int c;
128   int negative;
129   char *dotpos;
130   const char *expptr;
131   int exp_base;
132   const char  *point = GMP_DECIMAL_POINT;
133   size_t      pointlen = strlen (point);
134   const unsigned char *digit_value;
135   int incr;
136   size_t n_zeros_skipped;
137 
138   TMP_DECL;
139 
140   c = (unsigned char) *str;
141 
142   /* Skip whitespace.  */
143   while (isspace (c))
144     c = (unsigned char) *++str;
145 
146   negative = 0;
147   if (c == '-')
148     {
149       negative = 1;
150       c = (unsigned char) *++str;
151     }
152 
153   /* Default base to decimal.  */
154   if (base == 0)
155     base = 10;
156 
157   exp_base = base;
158 
159   if (base < 0)
160     {
161       exp_base = 10;
162       base = -base;
163     }
164 
165   digit_value = digit_value_tab;
166   if (base > 36)
167     {
168       /* For bases > 36, use the collating sequence
169            0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.  */
170       digit_value += 208;
171       if (base > 62)
172           return -1;                    /* too large base */
173     }
174 
175   /* Require at least one digit, possibly after an initial decimal point.  */
176   if (digit_value[c] >= base)
177     {
178       /* not a digit, must be a decimal point */
179       for (i = 0; i < pointlen; i++)
180           if (str[i] != point[i])
181             return -1;
182       if (digit_value[(unsigned char) str[pointlen]] >= base)
183           return -1;
184     }
185 
186   /* Locate exponent part of the input.  Look from the right of the string,
187      since the exponent is usually a lot shorter than the mantissa.  */
188   expptr = NULL;
189   str_size = strlen (str);
190   for (i = str_size - 1; i > 0; i--)
191     {
192       c = (unsigned char) str[i];
193       if (c == '@' || (base <= 10 && (c == 'e' || c == 'E')))
194           {
195             expptr = str + i + 1;
196             str_size = i;
197             break;
198           }
199     }
200 
201   TMP_MARK;
202   s = begs = (char *) TMP_ALLOC (str_size + 1);
203 
204   incr = 0;
205   n_zeros_skipped = 0;
206   dotpos = NULL;
207 
208   /* Loop through mantissa, converting it from ASCII to raw byte values.  */
209   for (i = 0; i < str_size; i++)
210     {
211       c = (unsigned char) *str;
212       if (!isspace (c))
213           {
214             int dig;
215 
216             for (j = 0; j < pointlen; j++)
217               if (str[j] != point[j])
218                 goto not_point;
219             if (1)
220               {
221                 if (dotpos != 0)
222                     {
223                       /* already saw a decimal point, another is invalid */
224                       TMP_FREE;
225                       return -1;
226                     }
227                 dotpos = s;
228                 str += pointlen - 1;
229                 i += pointlen - 1;
230               }
231             else
232               {
233               not_point:
234                 dig = digit_value[c];
235                 if (dig >= base)
236                     {
237                       TMP_FREE;
238                       return -1;
239                     }
240                 *s = dig;
241                 incr |= dig != 0;
242                 s += incr;    /* Increment after first non-0 digit seen. */
243                 if (dotpos != NULL)
244                     /* Count skipped zeros between radix point and first non-0
245                        digit. */
246                     n_zeros_skipped += 1 - incr;
247               }
248           }
249       c = (unsigned char) *++str;
250     }
251 
252   str_size = s - begs;
253 
254   {
255     long exp_in_base;
256     mp_size_t ra, ma, rn, mn;
257     int cnt;
258     mp_ptr mp, tp, rp;
259     mp_exp_t exp_in_limbs;
260     mp_size_t prec = PREC(x) + 1;
261     int divflag;
262     mp_size_t madj, radj;
263 
264 #if 0
265     size_t n_chars_needed;
266 
267     /* This needs careful testing.  Leave disabled for now.  */
268     /* Just consider the relevant leading digits of the mantissa.  */
269     LIMBS_PER_DIGIT_IN_BASE (n_chars_needed, prec, base);
270     if (str_size > n_chars_needed)
271       str_size = n_chars_needed;
272 #endif
273 
274     if (str_size == 0)
275       {
276           SIZ(x) = 0;
277           EXP(x) = 0;
278           TMP_FREE;
279           return 0;
280       }
281 
282     LIMBS_PER_DIGIT_IN_BASE (ma, str_size, base);
283     mp = TMP_ALLOC_LIMBS (ma);
284     mn = mpn_set_str (mp, (unsigned char *) begs, str_size, base);
285 
286     madj = 0;
287     /* Ignore excess limbs in MP,MSIZE.  */
288     if (mn > prec)
289       {
290           madj = mn - prec;
291           mp += mn - prec;
292           mn = prec;
293       }
294 
295     if (expptr != 0)
296       {
297           /* Scan and convert the exponent, in base exp_base.  */
298           long dig, minus, plusminus;
299           c = (unsigned char) *expptr;
300           minus = -(long) (c == '-');
301           plusminus = minus | -(long) (c == '+');
302           expptr -= plusminus;                              /* conditional increment */
303           c = (unsigned char) *expptr++;
304           dig = digit_value[c];
305           if (dig >= exp_base)
306             {
307               TMP_FREE;
308               return -1;
309             }
310           exp_in_base = dig;
311           c = (unsigned char) *expptr++;
312           dig = digit_value[c];
313           while (dig < exp_base)
314             {
315               exp_in_base = exp_in_base * exp_base;
316               exp_in_base += dig;
317               c = (unsigned char) *expptr++;
318               dig = digit_value[c];
319             }
320           exp_in_base = (exp_in_base ^ minus) - minus; /* conditional negation */
321       }
322     else
323       exp_in_base = 0;
324     if (dotpos != 0)
325       exp_in_base -= s - dotpos + n_zeros_skipped;
326     divflag = exp_in_base < 0;
327     exp_in_base = ABS (exp_in_base);
328 
329     if (exp_in_base == 0)
330       {
331           MPN_COPY (PTR(x), mp, mn);
332           SIZ(x) = negative ? -mn : mn;
333           EXP(x) = mn + madj;
334           TMP_FREE;
335           return 0;
336       }
337 
338     ra = 2 * (prec + 1);
339     TMP_ALLOC_LIMBS_2 (rp, ra, tp, ra);
340     rn = mpn_pow_1_highpart (rp, &radj, (mp_limb_t) base, exp_in_base, prec, tp);
341 
342     if (divflag)
343       {
344 #if 0
345           /* FIXME: Should use mpn_div_q here.  */
346           ...
347           mpn_div_q (tp, mp, mn, rp, rn, scratch);
348           ...
349 #else
350           mp_ptr qp;
351           mp_limb_t qlimb;
352           if (mn < rn)
353             {
354               /* Pad out MP,MSIZE for current divrem semantics.  */
355               mp_ptr tmp = TMP_ALLOC_LIMBS (rn + 1);
356               MPN_ZERO (tmp, rn - mn);
357               MPN_COPY (tmp + rn - mn, mp, mn);
358               mp = tmp;
359               madj -= rn - mn;
360               mn = rn;
361             }
362           if ((rp[rn - 1] & GMP_NUMB_HIGHBIT) == 0)
363             {
364               mp_limb_t cy;
365               count_leading_zeros (cnt, rp[rn - 1]);
366               cnt -= GMP_NAIL_BITS;
367               mpn_lshift (rp, rp, rn, cnt);
368               cy = mpn_lshift (mp, mp, mn, cnt);
369               if (cy)
370                 mp[mn++] = cy;
371             }
372 
373           qp = TMP_ALLOC_LIMBS (prec + 1);
374           qlimb = mpn_divrem (qp, prec - (mn - rn), mp, mn, rp, rn);
375           tp = qp;
376           exp_in_limbs = qlimb + (mn - rn) + (madj - radj);
377           rn = prec;
378           if (qlimb != 0)
379             {
380               tp[prec] = qlimb;
381               /* Skip the least significant limb not to overrun the destination
382                  variable.  */
383               tp++;
384             }
385 #endif
386       }
387     else
388       {
389           tp = TMP_ALLOC_LIMBS (rn + mn);
390           if (rn > mn)
391             mpn_mul (tp, rp, rn, mp, mn);
392           else
393             mpn_mul (tp, mp, mn, rp, rn);
394           rn += mn;
395           rn -= tp[rn - 1] == 0;
396           exp_in_limbs = rn + madj + radj;
397 
398           if (rn > prec)
399             {
400               tp += rn - prec;
401               rn = prec;
402               exp_in_limbs += 0;
403             }
404       }
405 
406     MPN_COPY (PTR(x), tp, rn);
407     SIZ(x) = negative ? -rn : rn;
408     EXP(x) = exp_in_limbs;
409     TMP_FREE;
410     return 0;
411   }
412 }
413