xref: /dragonfly/usr.bin/sort/coll.c (revision a98f7024a73d3aaccc56d8f7c0082a60a274a0a5)
1 /*-
2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD: head/usr.bin/sort/coll.c 281132 2015-04-06 02:35:55Z pfg $
28  */
29 
30 
31 #include <sys/types.h>
32 
33 #include <errno.h>
34 #include <err.h>
35 #include <langinfo.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <wctype.h>
42 #if defined(SORT_RANDOM)
43 #include <openssl/md5.h>
44 #endif
45 
46 #include "coll.h"
47 #include "vsort.h"
48 
49 struct key_specs *keys;
50 size_t keys_num = 0;
51 
52 wint_t symbol_decimal_point = L'.';
53 /* there is no default thousands separator in collate rules: */
54 wint_t symbol_thousands_sep = 0;
55 wint_t symbol_negative_sign = L'-';
56 wint_t symbol_positive_sign = L'+';
57 
58 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
59 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
60 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
61 static int numcoll(struct key_value*, struct key_value *, size_t offset);
62 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
63 #if defined(SORT_RANDOM)
64 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
65 #endif
66 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
67 
68 /*
69  * Allocate keys array
70  */
71 struct keys_array *
keys_array_alloc(void)72 keys_array_alloc(void)
73 {
74           struct keys_array *ka;
75           size_t sz;
76 
77           sz = keys_array_size();
78           ka = sort_malloc(sz);
79           memset(ka, 0, sz);
80 
81           return (ka);
82 }
83 
84 /*
85  * Calculate whether we need key hint space
86  */
87 static size_t
key_hint_size(void)88 key_hint_size(void)
89 {
90 
91           return (need_hint ? sizeof(struct key_hint) : 0);
92 }
93 
94 /*
95  * Calculate keys array size
96  */
97 size_t
keys_array_size(void)98 keys_array_size(void)
99 {
100 
101           return (keys_num * (sizeof(struct key_value) + key_hint_size()));
102 }
103 
104 /*
105  * Clean data of keys array
106  */
107 void
clean_keys_array(const struct bwstring * s,struct keys_array * ka)108 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
109 {
110 
111           if (ka) {
112                     for (size_t i = 0; i < keys_num; ++i)
113                               if (ka->key[i].k && ka->key[i].k != s)
114                                         bwsfree(ka->key[i].k);
115                     memset(ka, 0, keys_array_size());
116           }
117 }
118 
119 /*
120  * Set value of a key in the keys set
121  */
122 void
set_key_on_keys_array(struct keys_array * ka,struct bwstring * s,size_t ind)123 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
124 {
125 
126           if (ka && keys_num > ind) {
127                     struct key_value *kv;
128 
129                     kv = &(ka->key[ind]);
130 
131                     if (kv->k && kv->k != s)
132                               bwsfree(kv->k);
133                     kv->k = s;
134           }
135 }
136 
137 /*
138  * Initialize a sort list item
139  */
140 struct sort_list_item *
sort_list_item_alloc(void)141 sort_list_item_alloc(void)
142 {
143           struct sort_list_item *si;
144           size_t sz;
145 
146           sz = sizeof(struct sort_list_item) + keys_array_size();
147           si = sort_malloc(sz);
148           memset(si, 0, sz);
149 
150           return (si);
151 }
152 
153 size_t
sort_list_item_size(struct sort_list_item * si)154 sort_list_item_size(struct sort_list_item *si)
155 {
156           size_t ret = 0;
157 
158           if (si) {
159                     ret = sizeof(struct sort_list_item) + keys_array_size();
160                     if (si->str)
161                               ret += bws_memsize(si->str);
162                     for (size_t i = 0; i < keys_num; ++i) {
163                               struct key_value *kv;
164 
165                               kv = &(si->ka.key[i]);
166 
167                               if (kv->k != si->str)
168                                         ret += bws_memsize(kv->k);
169                     }
170           }
171           return (ret);
172 }
173 
174 /*
175  * Calculate key for a sort list item
176  */
177 static void
sort_list_item_make_key(struct sort_list_item * si)178 sort_list_item_make_key(struct sort_list_item *si)
179 {
180 
181           preproc(si->str, &(si->ka));
182 }
183 
184 /*
185  * Set value of a sort list item.
186  * Return combined string and keys memory size.
187  */
188 void
sort_list_item_set(struct sort_list_item * si,struct bwstring * str)189 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
190 {
191 
192           if (si) {
193                     clean_keys_array(si->str, &(si->ka));
194                     if (si->str) {
195                               if (si->str == str) {
196                                         /* we are trying to reset the same string */
197                                         return;
198                               } else {
199                                         bwsfree(si->str);
200                                         si->str = NULL;
201                               }
202                     }
203                     si->str = str;
204                     sort_list_item_make_key(si);
205           }
206 }
207 
208 /*
209  * De-allocate a sort list item object memory
210  */
211 void
sort_list_item_clean(struct sort_list_item * si)212 sort_list_item_clean(struct sort_list_item *si)
213 {
214 
215           if (si) {
216                     clean_keys_array(si->str, &(si->ka));
217                     if (si->str) {
218                               bwsfree(si->str);
219                               si->str = NULL;
220                     }
221           }
222 }
223 
224 /*
225  * Skip columns according to specs
226  */
227 static size_t
skip_cols_to_start(const struct bwstring * s,size_t cols,size_t start,bool skip_blanks,bool * empty_key)228 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
229     bool skip_blanks, bool *empty_key)
230 {
231           if (cols < 1)
232                     return (BWSLEN(s) + 1);
233 
234           if (skip_blanks)
235                     while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
236                               ++start;
237 
238           while (start < BWSLEN(s) && cols > 1) {
239                     --cols;
240                     ++start;
241           }
242 
243           if (start >= BWSLEN(s))
244                     *empty_key = true;
245 
246           return (start);
247 }
248 
249 /*
250  * Skip fields according to specs
251  */
252 static size_t
skip_fields_to_start(const struct bwstring * s,size_t fields,bool * empty_field)253 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
254 {
255 
256           if (fields < 2) {
257                     if (BWSLEN(s) == 0)
258                               *empty_field = true;
259                     return (0);
260           } else if (!(sort_opts_vals.tflag)) {
261                     size_t cpos = 0;
262                     bool pb = true;
263 
264                     while (cpos < BWSLEN(s)) {
265                               bool isblank;
266 
267                               isblank = iswblank(BWS_GET(s, cpos));
268 
269                               if (isblank && !pb) {
270                                         --fields;
271                                         if (fields <= 1)
272                                                   return (cpos);
273                               }
274                               pb = isblank;
275                               ++cpos;
276                     }
277                     if (fields > 1)
278                               *empty_field = true;
279                     return (cpos);
280           } else {
281                     size_t cpos = 0;
282 
283                     while (cpos < BWSLEN(s)) {
284                               if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
285                                         --fields;
286                                         if (fields <= 1)
287                                                   return (cpos + 1);
288                               }
289                               ++cpos;
290                     }
291                     if (fields > 1)
292                               *empty_field = true;
293                     return (cpos);
294           }
295 }
296 
297 /*
298  * Find fields start
299  */
300 static void
find_field_start(const struct bwstring * s,struct key_specs * ks,size_t * field_start,size_t * key_start,bool * empty_field,bool * empty_key)301 find_field_start(const struct bwstring *s, struct key_specs *ks,
302     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
303 {
304 
305           *field_start = skip_fields_to_start(s, ks->f1, empty_field);
306           if (!*empty_field)
307                     *key_start = skip_cols_to_start(s, ks->c1, *field_start,
308                         ks->pos1b, empty_key);
309           else
310                     *empty_key = true;
311 }
312 
313 /*
314  * Find end key position
315  */
316 static size_t
find_field_end(const struct bwstring * s,struct key_specs * ks)317 find_field_end(const struct bwstring *s, struct key_specs *ks)
318 {
319           size_t f2, next_field_start, pos_end;
320           bool empty_field, empty_key;
321 
322           pos_end = 0;
323           next_field_start = 0;
324           empty_field = false;
325           empty_key = false;
326           f2 = ks->f2;
327 
328           if (f2 == 0)
329                     return (BWSLEN(s) + 1);
330           else {
331                     if (ks->c2 == 0) {
332                               next_field_start = skip_fields_to_start(s, f2 + 1,
333                                   &empty_field);
334                               if ((next_field_start > 0) && sort_opts_vals.tflag &&
335                                   ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
336                                   next_field_start - 1)))
337                                         --next_field_start;
338                     } else
339                               next_field_start = skip_fields_to_start(s, f2,
340                                   &empty_field);
341           }
342 
343           if (empty_field || (next_field_start >= BWSLEN(s)))
344                     return (BWSLEN(s) + 1);
345 
346           if (ks->c2) {
347                     pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
348                         ks->pos2b, &empty_key);
349                     if (pos_end < BWSLEN(s))
350                               ++pos_end;
351           } else
352                     pos_end = next_field_start;
353 
354           return (pos_end);
355 }
356 
357 /*
358  * Cut a field according to the key specs
359  */
360 static struct bwstring *
cut_field(const struct bwstring * s,struct key_specs * ks)361 cut_field(const struct bwstring *s, struct key_specs *ks)
362 {
363           struct bwstring *ret = NULL;
364 
365           if (s && ks) {
366                     size_t field_start, key_end, key_start, sz;
367                     bool empty_field, empty_key;
368 
369                     field_start = 0;
370                     key_start = 0;
371                     empty_field = false;
372                     empty_key = false;
373 
374                     find_field_start(s, ks, &field_start, &key_start,
375                         &empty_field, &empty_key);
376 
377                     if (empty_key)
378                               sz = 0;
379                     else {
380                               key_end = find_field_end(s, ks);
381                               sz = (key_end < key_start) ? 0 : (key_end - key_start);
382                     }
383 
384                     ret = bwsalloc(sz);
385                     if (sz)
386                               bwsnocpy(ret, s, key_start, sz);
387           } else
388                     ret = bwsalloc(0);
389 
390           return (ret);
391 }
392 
393 /*
394  * Preprocesses a line applying the necessary transformations
395  * specified by command line options and returns the preprocessed
396  * string, which can be used to compare.
397  */
398 int
preproc(struct bwstring * s,struct keys_array * ka)399 preproc(struct bwstring *s, struct keys_array *ka)
400 {
401 
402           if (sort_opts_vals.kflag)
403                     for (size_t i = 0; i < keys_num; i++) {
404                               struct bwstring *key;
405                               struct key_specs *kspecs;
406                               struct sort_mods *sm;
407 
408                               kspecs = &(keys[i]);
409                               key = cut_field(s, kspecs);
410 
411                               sm = &(kspecs->sm);
412                               if (sm->dflag)
413                                         key = dictionary_order(key);
414                               else if (sm->iflag)
415                                         key = ignore_nonprinting(key);
416                               if (sm->fflag || sm->Mflag)
417                                         key = ignore_case(key);
418 
419                               set_key_on_keys_array(ka, key, i);
420                     }
421           else {
422                     struct bwstring *ret = NULL;
423                     struct sort_mods *sm = default_sort_mods;
424 
425                     if (sm->bflag) {
426                               if (ret == NULL)
427                                         ret = bwsdup(s);
428                               ret = ignore_leading_blanks(ret);
429                     }
430                     if (sm->dflag) {
431                               if (ret == NULL)
432                                         ret = bwsdup(s);
433                               ret = dictionary_order(ret);
434                     } else if (sm->iflag) {
435                               if (ret == NULL)
436                                         ret = bwsdup(s);
437                               ret = ignore_nonprinting(ret);
438                     }
439                     if (sm->fflag || sm->Mflag) {
440                               if (ret == NULL)
441                                         ret = bwsdup(s);
442                               ret = ignore_case(ret);
443                     }
444                     if (ret == NULL)
445                               set_key_on_keys_array(ka, s, 0);
446                     else
447                               set_key_on_keys_array(ka, ret, 0);
448           }
449 
450           return 0;
451 }
452 
453 cmpcoll_t
get_sort_func(struct sort_mods * sm)454 get_sort_func(struct sort_mods *sm)
455 {
456 
457           if (sm->nflag)
458                     return (numcoll);
459           else if (sm->hflag)
460                     return (hnumcoll);
461           else if (sm->gflag)
462                     return (gnumcoll);
463           else if (sm->Mflag)
464                     return (monthcoll);
465 #if defined(SORT_RANDOM)
466           else if (sm->Rflag)
467                     return (randomcoll);
468 #endif
469           else if (sm->Vflag)
470                     return (versioncoll);
471           else
472                     return (wstrcoll);
473 }
474 
475 /*
476  * Compares the given strings.  Returns a positive number if
477  * the first precedes the second, a negative number if the second is
478  * the preceding one, and zero if they are equal.  This function calls
479  * the underlying collate functions, which done the actual comparison.
480  */
481 int
key_coll(struct keys_array * ps1,struct keys_array * ps2,size_t offset)482 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
483 {
484           struct sort_mods *sm;
485           int res = 0;
486 
487           for (size_t i = 0; i < keys_num; ++i) {
488                     sm = &(keys[i].sm);
489 
490                     if (sm->rflag)
491                               res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
492                     else
493                               res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
494 
495                     if (res)
496                               break;
497 
498                     /* offset applies to only the first key */
499                     offset = 0;
500           }
501           return (res);
502 }
503 
504 /*
505  * Compare two strings.
506  * Plain symbol-by-symbol comparison.
507  */
508 int
top_level_str_coll(const struct bwstring * s1,const struct bwstring * s2)509 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
510 {
511 
512           if (default_sort_mods->rflag) {
513                     const struct bwstring *tmp;
514 
515                     tmp = s1;
516                     s1 = s2;
517                     s2 = tmp;
518           }
519 
520           return (bwscoll(s1, s2, 0));
521 }
522 
523 /*
524  * Compare a string and a sort list item, according to the sort specs.
525  */
526 int
str_list_coll(struct bwstring * str1,struct sort_list_item ** ss2)527 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
528 {
529           struct keys_array *ka1;
530           int ret = 0;
531 
532           ka1 = keys_array_alloc();
533 
534           preproc(str1, ka1);
535 
536           sort_list_item_make_key(*ss2);
537 
538           if (debug_sort) {
539                     bwsprintf(stdout, str1, "; s1=<", ">");
540                     bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
541           }
542 
543           ret = key_coll(ka1, &((*ss2)->ka), 0);
544 
545           if (debug_sort)
546                     printf("; cmp1=%d", ret);
547 
548           clean_keys_array(str1, ka1);
549           sort_free(ka1);
550 
551           if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
552                     ret = top_level_str_coll(str1, ((*ss2)->str));
553                     if (debug_sort)
554                               printf("; cmp2=%d", ret);
555           }
556 
557           if (debug_sort)
558                     printf("\n");
559 
560           return (ret);
561 }
562 
563 /*
564  * Compare two sort list items, according to the sort specs.
565  */
566 int
list_coll_offset(struct sort_list_item ** ss1,struct sort_list_item ** ss2,size_t offset)567 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
568     size_t offset)
569 {
570           int ret;
571 
572           ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
573 
574           if (debug_sort) {
575                     if (offset)
576                               printf("; offset=%d", (int) offset);
577                     bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
578                     bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
579                     printf("; cmp1=%d\n", ret);
580           }
581 
582           if (ret)
583                     return (ret);
584 
585           if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
586                     ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
587                     if (debug_sort)
588                               printf("; cmp2=%d\n", ret);
589           }
590 
591           return (ret);
592 }
593 
594 /*
595  * Compare two sort list items, according to the sort specs.
596  */
597 int
list_coll(struct sort_list_item ** ss1,struct sort_list_item ** ss2)598 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
599 {
600 
601           return (list_coll_offset(ss1, ss2, 0));
602 }
603 
604 #define   LSCDEF(N)                                                             \
605 static int                                                                                \
606 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)         \
607 {                                                                                         \
608                                                                                           \
609           return (list_coll_offset(ss1, ss2, N));                               \
610 }
611 
612 LSCDEF(1)
613 LSCDEF(2)
614 LSCDEF(3)
615 LSCDEF(4)
616 LSCDEF(5)
617 LSCDEF(6)
618 LSCDEF(7)
619 LSCDEF(8)
620 LSCDEF(9)
621 LSCDEF(10)
622 LSCDEF(11)
623 LSCDEF(12)
624 LSCDEF(13)
625 LSCDEF(14)
626 LSCDEF(15)
627 LSCDEF(16)
628 LSCDEF(17)
629 LSCDEF(18)
630 LSCDEF(19)
631 LSCDEF(20)
632 
633 listcoll_t
get_list_call_func(size_t offset)634 get_list_call_func(size_t offset)
635 {
636           static const listcoll_t lsarray[] = { list_coll, list_coll_1,
637               list_coll_2, list_coll_3, list_coll_4, list_coll_5,
638               list_coll_6, list_coll_7, list_coll_8, list_coll_9,
639               list_coll_10, list_coll_11, list_coll_12, list_coll_13,
640               list_coll_14, list_coll_15, list_coll_16, list_coll_17,
641               list_coll_18, list_coll_19, list_coll_20 };
642 
643           if (offset <= 20)
644                     return (lsarray[offset]);
645 
646           return (list_coll);
647 }
648 
649 /*
650  * Compare two sort list items, only by their original string.
651  */
652 int
list_coll_by_str_only(struct sort_list_item ** ss1,struct sort_list_item ** ss2)653 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
654 {
655 
656           return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
657 }
658 
659 /*
660  * Maximum size of a number in the string (before or after decimal point)
661  */
662 #define MAX_NUM_SIZE (128)
663 
664 /*
665  * Set suffix value
666  */
setsuffix(wchar_t c,unsigned char * si)667 static void setsuffix(wchar_t c, unsigned char *si)
668 {
669           switch (c){
670           case L'k':
671           case L'K':
672                     *si = 1;
673                     break;
674           case L'M':
675                     *si = 2;
676                     break;
677           case L'G':
678                     *si = 3;
679                     break;
680           case L'T':
681                     *si = 4;
682                     break;
683           case L'P':
684                     *si = 5;
685                     break;
686           case L'E':
687                     *si = 6;
688                     break;
689           case L'Z':
690                     *si = 7;
691                     break;
692           case L'Y':
693                     *si = 8;
694                     break;
695           default:
696                     *si = 0;
697           };
698 }
699 
700 /*
701  * Read string s and parse the string into a fixed-decimal-point number.
702  * sign equals -1 if the number is negative (explicit plus is not allowed,
703  * according to GNU sort's "info sort".
704  * The number part before decimal point is in the smain, after the decimal
705  * point is in sfrac, tail is the pointer to the remainder of the string.
706  */
707 static int
read_number(struct bwstring * s0,int * sign,wchar_t * smain,size_t * main_len,wchar_t * sfrac,size_t * frac_len,unsigned char * si)708 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
709 {
710           bwstring_iterator s;
711 
712           s = bws_begin(s0);
713 
714           /* always end the fraction with zero, even if we have no fraction */
715           sfrac[0] = 0;
716 
717           while (iswblank(bws_get_iter_value(s)))
718                     s = bws_iterator_inc(s, 1);
719 
720           if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
721                     *sign = -1;
722                     s = bws_iterator_inc(s, 1);
723           }
724 
725           // This is '0', not '\0', do not change this
726           while (iswdigit(bws_get_iter_value(s)) &&
727               (bws_get_iter_value(s) == L'0'))
728                     s = bws_iterator_inc(s, 1);
729 
730           while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
731                     if (iswdigit(bws_get_iter_value(s))) {
732                               smain[*main_len] = bws_get_iter_value(s);
733                               s = bws_iterator_inc(s, 1);
734                               *main_len += 1;
735                     } else if (symbol_thousands_sep &&
736                         (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
737                               s = bws_iterator_inc(s, 1);
738                     else
739                               break;
740           }
741 
742           smain[*main_len] = 0;
743 
744           if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
745                     s = bws_iterator_inc(s, 1);
746                     while (iswdigit(bws_get_iter_value(s)) &&
747                         *frac_len < MAX_NUM_SIZE) {
748                               sfrac[*frac_len] = bws_get_iter_value(s);
749                               s = bws_iterator_inc(s, 1);
750                               *frac_len += 1;
751                     }
752                     sfrac[*frac_len] = 0;
753 
754                     while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
755                               --(*frac_len);
756                               sfrac[*frac_len] = L'\0';
757                     }
758           }
759 
760           setsuffix(bws_get_iter_value(s),si);
761 
762           if ((*main_len + *frac_len) == 0)
763                     *sign = 0;
764 
765           return (0);
766 }
767 
768 /*
769  * Implements string sort.
770  */
771 static int
wstrcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)772 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
773 {
774 
775           if (debug_sort) {
776                     if (offset)
777                               printf("; offset=%d\n", (int) offset);
778                     bwsprintf(stdout, kv1->k, "; k1=<", ">");
779                     printf("(%zu)", BWSLEN(kv1->k));
780                     bwsprintf(stdout, kv2->k, ", k2=<", ">");
781                     printf("(%zu)", BWSLEN(kv2->k));
782           }
783 
784           return (bwscoll(kv1->k, kv2->k, offset));
785 }
786 
787 /*
788  * Compare two suffixes
789  */
790 static inline int
cmpsuffix(unsigned char si1,unsigned char si2)791 cmpsuffix(unsigned char si1, unsigned char si2)
792 {
793 
794           return ((char)si1 - (char)si2);
795 }
796 
797 /*
798  * Implements numeric sort for -n and -h.
799  */
800 static int
numcoll_impl(struct key_value * kv1,struct key_value * kv2,size_t offset __unused,bool use_suffix)801 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
802     size_t offset __unused, bool use_suffix)
803 {
804           struct bwstring *s1, *s2;
805           wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
806           wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
807           int cmp_res, sign1, sign2;
808           size_t frac1, frac2, main1, main2;
809           unsigned char SI1, SI2;
810           bool e1, e2, key1_read, key2_read;
811 
812           s1 = kv1->k;
813           s2 = kv2->k;
814           sign1 = sign2 = 0;
815           main1 = main2 = 0;
816           frac1 = frac2 = 0;
817 
818           cmp_res = 0;
819           key1_read = key2_read = false;
820 
821           if (debug_sort) {
822                     bwsprintf(stdout, s1, "; k1=<", ">");
823                     bwsprintf(stdout, s2, ", k2=<", ">");
824           }
825 
826           if (s1 == s2)
827                     return (0);
828 
829           if (kv1->hint->status == HS_UNINITIALIZED) {
830                     /* read the number from the string */
831                     read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
832                     key1_read = true;
833                     kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
834                     if(main1 < 1 && frac1 < 1)
835                               kv1->hint->v.nh.empty=true;
836                     kv1->hint->v.nh.si = SI1;
837                     kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
838                         HS_INITIALIZED : HS_ERROR;
839                     kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
840           }
841 
842           if (kv2->hint->status == HS_UNINITIALIZED) {
843                     /* read the number from the string */
844                     read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
845                     key2_read = true;
846                     kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
847                     if(main2 < 1 && frac2 < 1)
848                               kv2->hint->v.nh.empty=true;
849                     kv2->hint->v.nh.si = SI2;
850                     kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
851                         HS_INITIALIZED : HS_ERROR;
852                     kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
853           }
854 
855           if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
856               HS_INITIALIZED) {
857                     unsigned long long n1, n2;
858                     bool neg1, neg2;
859 
860                     e1 = kv1->hint->v.nh.empty;
861                     e2 = kv2->hint->v.nh.empty;
862 
863                     if (e1 && e2)
864                               return (0);
865 
866                     neg1 = kv1->hint->v.nh.neg;
867                     neg2 = kv2->hint->v.nh.neg;
868 
869                     if (neg1 && !neg2)
870                               return (-1);
871                     if (neg2 && !neg1)
872                               return (+1);
873 
874                     if (e1)
875                               return (neg2 ? +1 : -1);
876                     else if (e2)
877                               return (neg1 ? -1 : +1);
878 
879 
880                     if (use_suffix) {
881                               cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
882                               if (cmp_res)
883                                         return (neg1 ? -cmp_res : cmp_res);
884                     }
885 
886                     n1 = kv1->hint->v.nh.n1;
887                     n2 = kv2->hint->v.nh.n1;
888                     if (n1 < n2)
889                               return (neg1 ? +1 : -1);
890                     else if (n1 > n2)
891                               return (neg1 ? -1 : +1);
892           }
893 
894           /* read the numbers from the strings */
895           if (!key1_read)
896                     read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
897           if (!key2_read)
898                     read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
899 
900           e1 = ((main1 + frac1) == 0);
901           e2 = ((main2 + frac2) == 0);
902 
903           if (e1 && e2)
904                     return (0);
905 
906           /* we know the result if the signs are different */
907           if (sign1 < 0 && sign2 >= 0)
908                     return (-1);
909           if (sign1 >= 0 && sign2 < 0)
910                     return (+1);
911 
912           if (e1)
913                     return ((sign2 < 0) ? +1 : -1);
914           else if (e2)
915                     return ((sign1 < 0) ? -1 : +1);
916 
917           if (use_suffix) {
918                     cmp_res = cmpsuffix(SI1, SI2);
919                     if (cmp_res)
920                               return ((sign1 < 0) ? -cmp_res : cmp_res);
921           }
922 
923           /* if both numbers are empty assume that the strings are equal */
924           if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
925                     return (0);
926 
927           /*
928            * if the main part is of different size, we know the result
929            * (because the leading zeros are removed)
930            */
931           if (main1 < main2)
932                     cmp_res = -1;
933           else if (main1 > main2)
934                     cmp_res = +1;
935           /* if the sizes are equal then simple non-collate string compare gives the correct result */
936           else
937                     cmp_res = wcscmp(smain1, smain2);
938 
939           /* check fraction */
940           if (!cmp_res)
941                     cmp_res = wcscmp(sfrac1, sfrac2);
942 
943           if (!cmp_res)
944                     return (0);
945 
946           /* reverse result if the signs are negative */
947           if (sign1 < 0 && sign2 < 0)
948                     cmp_res = -cmp_res;
949 
950           return (cmp_res);
951 }
952 
953 /*
954  * Implements numeric sort (-n).
955  */
956 static int
numcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)957 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
958 {
959 
960           return (numcoll_impl(kv1, kv2, offset, false));
961 }
962 
963 /*
964  * Implements 'human' numeric sort (-h).
965  */
966 static int
hnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)967 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
968 {
969 
970           return (numcoll_impl(kv1, kv2, offset, true));
971 }
972 
973 /*
974  * Implements random sort (-R).
975  */
976 #if defined(SORT_RANDOM)
977 static char *
randomcollend(MD5_CTX * ctx)978 randomcollend(MD5_CTX *ctx)
979 {
980           unsigned char digest[MD5_DIGEST_LENGTH];
981           static const char hex[]="0123456789abcdef";
982           char *buf;
983           int i;
984 
985           buf = malloc(MD5_DIGEST_LENGTH * 2 + 1);
986           if (!buf)
987                     return NULL;
988           MD5_Final(digest, ctx);
989           for (i = 0; i < MD5_DIGEST_LENGTH; i++) {
990                     buf[2*i] = hex[digest[i] >> 4];
991                     buf[2*i+1] = hex[digest[i] & 0x0f];
992           }
993           buf[MD5_DIGEST_LENGTH * 2] = '\0';
994           return buf;
995 }
996 
997 static int
randomcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)998 randomcoll(struct key_value *kv1, struct key_value *kv2,
999     size_t offset __unused)
1000 {
1001           struct bwstring *s1, *s2;
1002           MD5_CTX ctx1, ctx2;
1003           char *b1, *b2;
1004 
1005           s1 = kv1->k;
1006           s2 = kv2->k;
1007 
1008           if (debug_sort) {
1009                     bwsprintf(stdout, s1, "; k1=<", ">");
1010                     bwsprintf(stdout, s2, ", k2=<", ">");
1011           }
1012 
1013           if (s1 == s2)
1014                     return (0);
1015 
1016           memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
1017           memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
1018 
1019           MD5_Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1020           MD5_Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1021           b1 = randomcollend(&ctx1);
1022           b2 = randomcollend(&ctx2);
1023           if (b1 == NULL) {
1024                     if (b2 == NULL)
1025                               return (0);
1026                     else {
1027                               sort_free(b2);
1028                               return (-1);
1029                     }
1030           } else if (b2 == NULL) {
1031                     sort_free(b1);
1032                     return (+1);
1033           } else {
1034                     int cmp_res;
1035 
1036                     cmp_res = strcmp(b1,b2);
1037                     sort_free(b1);
1038                     sort_free(b2);
1039 
1040                     if (!cmp_res)
1041                               cmp_res = bwscoll(s1, s2, 0);
1042 
1043                     return (cmp_res);
1044           }
1045 }
1046 #endif
1047 
1048 /*
1049  * Implements version sort (-V).
1050  */
1051 static int
versioncoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1052 versioncoll(struct key_value *kv1, struct key_value *kv2,
1053     size_t offset __unused)
1054 {
1055           struct bwstring *s1, *s2;
1056 
1057           s1 = kv1->k;
1058           s2 = kv2->k;
1059 
1060           if (debug_sort) {
1061                     bwsprintf(stdout, s1, "; k1=<", ">");
1062                     bwsprintf(stdout, s2, ", k2=<", ">");
1063           }
1064 
1065           if (s1 == s2)
1066                     return (0);
1067 
1068           return (vcmp(s1, s2));
1069 }
1070 
1071 /*
1072  * Check for minus infinity
1073  */
1074 static inline bool
huge_minus(double d,int err1)1075 huge_minus(double d, int err1)
1076 {
1077 
1078           if (err1 == ERANGE)
1079                     if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1080                               return (+1);
1081 
1082           return (0);
1083 }
1084 
1085 /*
1086  * Check for plus infinity
1087  */
1088 static inline bool
huge_plus(double d,int err1)1089 huge_plus(double d, int err1)
1090 {
1091 
1092           if (err1 == ERANGE)
1093                     if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1094                               return (+1);
1095 
1096           return (0);
1097 }
1098 
1099 /*
1100  * Check whether a function is a NAN
1101  */
1102 static bool
is_nan(double d)1103 is_nan(double d)
1104 {
1105           return ((d == NAN) || (isnan(d)));
1106 }
1107 
1108 /*
1109  * Compare two NANs
1110  */
1111 static int
cmp_nans(double d1,double d2)1112 cmp_nans(double d1, double d2)
1113 {
1114 
1115           if (d1 < d2)
1116                     return (-1);
1117           if (d1 > d2)
1118                     return (+1);
1119           return (0);
1120 }
1121 
1122 /*
1123  * Implements general numeric sort (-g).
1124  */
1125 static int
gnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1126 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1127     size_t offset __unused)
1128 {
1129           double d1, d2;
1130           int err1, err2;
1131           bool empty1, empty2, key1_read, key2_read;
1132 
1133           d1 = d2 = 0;
1134           err1 = err2 = 0;
1135           key1_read = key2_read = false;
1136 
1137           if (debug_sort) {
1138                     bwsprintf(stdout, kv1->k, "; k1=<", ">");
1139                     bwsprintf(stdout, kv2->k, "; k2=<", ">");
1140           }
1141 
1142           if (kv1->hint->status == HS_UNINITIALIZED) {
1143                     errno = 0;
1144                     d1 = bwstod(kv1->k, &empty1);
1145                     err1 = errno;
1146 
1147                     if (empty1)
1148                               kv1->hint->v.gh.notnum = true;
1149                     else if (err1 == 0) {
1150                               kv1->hint->v.gh.d = d1;
1151                               kv1->hint->v.gh.nan = is_nan(d1);
1152                               kv1->hint->status = HS_INITIALIZED;
1153                     } else
1154                               kv1->hint->status = HS_ERROR;
1155 
1156                     key1_read = true;
1157           }
1158 
1159           if (kv2->hint->status == HS_UNINITIALIZED) {
1160                     errno = 0;
1161                     d2 = bwstod(kv2->k, &empty2);
1162                     err2 = errno;
1163 
1164                     if (empty2)
1165                               kv2->hint->v.gh.notnum = true;
1166                     else if (err2 == 0) {
1167                               kv2->hint->v.gh.d = d2;
1168                               kv2->hint->v.gh.nan = is_nan(d2);
1169                               kv2->hint->status = HS_INITIALIZED;
1170                     } else
1171                               kv2->hint->status = HS_ERROR;
1172 
1173                     key2_read = true;
1174           }
1175 
1176           if (kv1->hint->status == HS_INITIALIZED &&
1177               kv2->hint->status == HS_INITIALIZED) {
1178                     if (kv1->hint->v.gh.notnum)
1179                               return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1180                     else if (kv2->hint->v.gh.notnum)
1181                               return (+1);
1182 
1183                     if (kv1->hint->v.gh.nan)
1184                               return ((kv2->hint->v.gh.nan) ?
1185                                   cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1186                                   -1);
1187                     else if (kv2->hint->v.gh.nan)
1188                               return (+1);
1189 
1190                     d1 = kv1->hint->v.gh.d;
1191                     d2 = kv2->hint->v.gh.d;
1192 
1193                     if (d1 < d2)
1194                               return (-1);
1195                     else if (d1 > d2)
1196                               return (+1);
1197                     else
1198                               return (0);
1199           }
1200 
1201           if (!key1_read) {
1202                     errno = 0;
1203                     d1 = bwstod(kv1->k, &empty1);
1204                     err1 = errno;
1205           }
1206 
1207           if (!key2_read) {
1208                     errno = 0;
1209                     d2 = bwstod(kv2->k, &empty2);
1210                     err2 = errno;
1211           }
1212 
1213           /* Non-value case: */
1214           if (empty1)
1215                     return (empty2 ? 0 : -1);
1216           else if (empty2)
1217                     return (+1);
1218 
1219           /* NAN case */
1220           if (is_nan(d1))
1221                     return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1222           else if (is_nan(d2))
1223                     return (+1);
1224 
1225           /* Infinities */
1226           if (err1 == ERANGE || err2 == ERANGE) {
1227                     /* Minus infinity case */
1228                     if (huge_minus(d1, err1)) {
1229                               if (huge_minus(d2, err2)) {
1230                                         if (d1 < d2)
1231                                                   return (-1);
1232                                         if (d1 > d2)
1233                                                   return (+1);
1234                                         return (0);
1235                               } else
1236                                         return (-1);
1237 
1238                     } else if (huge_minus(d2, err2)) {
1239                               if (huge_minus(d1, err1)) {
1240                                         if (d1 < d2)
1241                                                   return (-1);
1242                                         if (d1 > d2)
1243                                                   return (+1);
1244                                         return (0);
1245                               } else
1246                                         return (+1);
1247                     }
1248 
1249                     /* Plus infinity case */
1250                     if (huge_plus(d1, err1)) {
1251                               if (huge_plus(d2, err2)) {
1252                                         if (d1 < d2)
1253                                                   return (-1);
1254                                         if (d1 > d2)
1255                                                   return (+1);
1256                                         return (0);
1257                               } else
1258                                         return (+1);
1259                     } else if (huge_plus(d2, err2)) {
1260                               if (huge_plus(d1, err1)) {
1261                                         if (d1 < d2)
1262                                                   return (-1);
1263                                         if (d1 > d2)
1264                                                   return (+1);
1265                                         return (0);
1266                               } else
1267                                         return (-1);
1268                     }
1269           }
1270 
1271           if (d1 < d2)
1272                     return (-1);
1273           if (d1 > d2)
1274                     return (+1);
1275 
1276           return (0);
1277 }
1278 
1279 /*
1280  * Implements month sort (-M).
1281  */
1282 static int
monthcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1283 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1284 {
1285           int val1, val2;
1286           bool key1_read, key2_read;
1287 
1288           val1 = val2 = 0;
1289           key1_read = key2_read = false;
1290 
1291           if (debug_sort) {
1292                     bwsprintf(stdout, kv1->k, "; k1=<", ">");
1293                     bwsprintf(stdout, kv2->k, "; k2=<", ">");
1294           }
1295 
1296           if (kv1->hint->status == HS_UNINITIALIZED) {
1297                     kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1298                     key1_read = true;
1299                     kv1->hint->status = HS_INITIALIZED;
1300           }
1301 
1302           if (kv2->hint->status == HS_UNINITIALIZED) {
1303                     kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1304                     key2_read = true;
1305                     kv2->hint->status = HS_INITIALIZED;
1306           }
1307 
1308           if (kv1->hint->status == HS_INITIALIZED) {
1309                     val1 = kv1->hint->v.Mh.m;
1310                     key1_read = true;
1311           }
1312 
1313           if (kv2->hint->status == HS_INITIALIZED) {
1314                     val2 = kv2->hint->v.Mh.m;
1315                     key2_read = true;
1316           }
1317 
1318           if (!key1_read)
1319                     val1 = bws_month_score(kv1->k);
1320           if (!key2_read)
1321                     val2 = bws_month_score(kv2->k);
1322 
1323           if (val1 == val2) {
1324                     return (0);
1325           }
1326           if (val1 < val2)
1327                     return (-1);
1328           return (+1);
1329 }
1330