1 /*-
2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD: stable/10/usr.bin/sort/coll.c 318152 2017-05-10 20:29:01Z marius $");
30
31 #include <sys/types.h>
32
33 #include <errno.h>
34 #include <err.h>
35 #include <langinfo.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <md5.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <wchar.h>
42 #include <wctype.h>
43
44 #include "coll.h"
45 #include "vsort.h"
46
47 struct key_specs *keys;
48 size_t keys_num = 0;
49
50 wint_t symbol_decimal_point = L'.';
51 /* there is no default thousands separator in collate rules: */
52 wint_t symbol_thousands_sep = 0;
53 wint_t symbol_negative_sign = L'-';
54 wint_t symbol_positive_sign = L'+';
55
56 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
57 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
58 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
59 static int numcoll(struct key_value*, struct key_value *, size_t offset);
60 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
61 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
62 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
63
64 /*
65 * Allocate keys array
66 */
67 struct keys_array *
keys_array_alloc(void)68 keys_array_alloc(void)
69 {
70 struct keys_array *ka;
71 size_t sz;
72
73 sz = keys_array_size();
74 ka = sort_malloc(sz);
75 memset(ka, 0, sz);
76
77 return (ka);
78 }
79
80 /*
81 * Calculate whether we need key hint space
82 */
83 static size_t
key_hint_size(void)84 key_hint_size(void)
85 {
86
87 return (need_hint ? sizeof(struct key_hint) : 0);
88 }
89
90 /*
91 * Calculate keys array size
92 */
93 size_t
keys_array_size(void)94 keys_array_size(void)
95 {
96
97 return (keys_num * (sizeof(struct key_value) + key_hint_size()));
98 }
99
100 /*
101 * Clean data of keys array
102 */
103 void
clean_keys_array(const struct bwstring * s,struct keys_array * ka)104 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
105 {
106
107 if (ka) {
108 for (size_t i = 0; i < keys_num; ++i) {
109 const struct key_value *kv;
110
111 kv = get_key_from_keys_array(ka, i);
112 if (kv->k && kv->k != s)
113 bwsfree(kv->k);
114 }
115 memset(ka, 0, keys_array_size());
116 }
117 }
118
119 /*
120 * Get pointer to a key value in the keys set
121 */
122 struct key_value *
get_key_from_keys_array(struct keys_array * ka,size_t ind)123 get_key_from_keys_array(struct keys_array *ka, size_t ind)
124 {
125
126 return ((struct key_value *)((caddr_t)ka->key +
127 ind * (sizeof(struct key_value) + key_hint_size())));
128 }
129
130 /*
131 * Set value of a key in the keys set
132 */
133 void
set_key_on_keys_array(struct keys_array * ka,struct bwstring * s,size_t ind)134 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
135 {
136
137 if (ka && keys_num > ind) {
138 struct key_value *kv;
139
140 kv = get_key_from_keys_array(ka, ind);
141
142 if (kv->k && kv->k != s)
143 bwsfree(kv->k);
144 kv->k = s;
145 }
146 }
147
148 /*
149 * Initialize a sort list item
150 */
151 struct sort_list_item *
sort_list_item_alloc(void)152 sort_list_item_alloc(void)
153 {
154 struct sort_list_item *si;
155 size_t sz;
156
157 sz = sizeof(struct sort_list_item) + keys_array_size();
158 si = sort_malloc(sz);
159 memset(si, 0, sz);
160
161 return (si);
162 }
163
164 size_t
sort_list_item_size(struct sort_list_item * si)165 sort_list_item_size(struct sort_list_item *si)
166 {
167 size_t ret = 0;
168
169 if (si) {
170 ret = sizeof(struct sort_list_item) + keys_array_size();
171 if (si->str)
172 ret += bws_memsize(si->str);
173 for (size_t i = 0; i < keys_num; ++i) {
174 const struct key_value *kv;
175
176 kv = get_key_from_keys_array(&si->ka, i);
177
178 if (kv->k != si->str)
179 ret += bws_memsize(kv->k);
180 }
181 }
182 return (ret);
183 }
184
185 /*
186 * Calculate key for a sort list item
187 */
188 static void
sort_list_item_make_key(struct sort_list_item * si)189 sort_list_item_make_key(struct sort_list_item *si)
190 {
191
192 preproc(si->str, &(si->ka));
193 }
194
195 /*
196 * Set value of a sort list item.
197 * Return combined string and keys memory size.
198 */
199 void
sort_list_item_set(struct sort_list_item * si,struct bwstring * str)200 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
201 {
202
203 if (si) {
204 clean_keys_array(si->str, &(si->ka));
205 if (si->str) {
206 if (si->str == str) {
207 /* we are trying to reset the same string */
208 return;
209 } else {
210 bwsfree(si->str);
211 si->str = NULL;
212 }
213 }
214 si->str = str;
215 sort_list_item_make_key(si);
216 }
217 }
218
219 /*
220 * De-allocate a sort list item object memory
221 */
222 void
sort_list_item_clean(struct sort_list_item * si)223 sort_list_item_clean(struct sort_list_item *si)
224 {
225
226 if (si) {
227 clean_keys_array(si->str, &(si->ka));
228 if (si->str) {
229 bwsfree(si->str);
230 si->str = NULL;
231 }
232 }
233 }
234
235 /*
236 * Skip columns according to specs
237 */
238 static size_t
skip_cols_to_start(const struct bwstring * s,size_t cols,size_t start,bool skip_blanks,bool * empty_key)239 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
240 bool skip_blanks, bool *empty_key)
241 {
242 if (cols < 1)
243 return (BWSLEN(s) + 1);
244
245 if (skip_blanks)
246 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
247 ++start;
248
249 while (start < BWSLEN(s) && cols > 1) {
250 --cols;
251 ++start;
252 }
253
254 if (start >= BWSLEN(s))
255 *empty_key = true;
256
257 return (start);
258 }
259
260 /*
261 * Skip fields according to specs
262 */
263 static size_t
skip_fields_to_start(const struct bwstring * s,size_t fields,bool * empty_field)264 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
265 {
266
267 if (fields < 2) {
268 if (BWSLEN(s) == 0)
269 *empty_field = true;
270 return (0);
271 } else if (!(sort_opts_vals.tflag)) {
272 size_t cpos = 0;
273 bool pb = true;
274
275 while (cpos < BWSLEN(s)) {
276 bool isblank;
277
278 isblank = iswblank(BWS_GET(s, cpos));
279
280 if (isblank && !pb) {
281 --fields;
282 if (fields <= 1)
283 return (cpos);
284 }
285 pb = isblank;
286 ++cpos;
287 }
288 if (fields > 1)
289 *empty_field = true;
290 return (cpos);
291 } else {
292 size_t cpos = 0;
293
294 while (cpos < BWSLEN(s)) {
295 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
296 --fields;
297 if (fields <= 1)
298 return (cpos + 1);
299 }
300 ++cpos;
301 }
302 if (fields > 1)
303 *empty_field = true;
304 return (cpos);
305 }
306 }
307
308 /*
309 * Find fields start
310 */
311 static void
find_field_start(const struct bwstring * s,struct key_specs * ks,size_t * field_start,size_t * key_start,bool * empty_field,bool * empty_key)312 find_field_start(const struct bwstring *s, struct key_specs *ks,
313 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
314 {
315
316 *field_start = skip_fields_to_start(s, ks->f1, empty_field);
317 if (!*empty_field)
318 *key_start = skip_cols_to_start(s, ks->c1, *field_start,
319 ks->pos1b, empty_key);
320 else
321 *empty_key = true;
322 }
323
324 /*
325 * Find end key position
326 */
327 static size_t
find_field_end(const struct bwstring * s,struct key_specs * ks)328 find_field_end(const struct bwstring *s, struct key_specs *ks)
329 {
330 size_t f2, next_field_start, pos_end;
331 bool empty_field, empty_key;
332
333 pos_end = 0;
334 next_field_start = 0;
335 empty_field = false;
336 empty_key = false;
337 f2 = ks->f2;
338
339 if (f2 == 0)
340 return (BWSLEN(s) + 1);
341 else {
342 if (ks->c2 == 0) {
343 next_field_start = skip_fields_to_start(s, f2 + 1,
344 &empty_field);
345 if ((next_field_start > 0) && sort_opts_vals.tflag &&
346 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
347 next_field_start - 1)))
348 --next_field_start;
349 } else
350 next_field_start = skip_fields_to_start(s, f2,
351 &empty_field);
352 }
353
354 if (empty_field || (next_field_start >= BWSLEN(s)))
355 return (BWSLEN(s) + 1);
356
357 if (ks->c2) {
358 pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
359 ks->pos2b, &empty_key);
360 if (pos_end < BWSLEN(s))
361 ++pos_end;
362 } else
363 pos_end = next_field_start;
364
365 return (pos_end);
366 }
367
368 /*
369 * Cut a field according to the key specs
370 */
371 static struct bwstring *
cut_field(const struct bwstring * s,struct key_specs * ks)372 cut_field(const struct bwstring *s, struct key_specs *ks)
373 {
374 struct bwstring *ret = NULL;
375
376 if (s && ks) {
377 size_t field_start, key_end, key_start, sz;
378 bool empty_field, empty_key;
379
380 field_start = 0;
381 key_start = 0;
382 empty_field = false;
383 empty_key = false;
384
385 find_field_start(s, ks, &field_start, &key_start,
386 &empty_field, &empty_key);
387
388 if (empty_key)
389 sz = 0;
390 else {
391 key_end = find_field_end(s, ks);
392 sz = (key_end < key_start) ? 0 : (key_end - key_start);
393 }
394
395 ret = bwsalloc(sz);
396 if (sz)
397 bwsnocpy(ret, s, key_start, sz);
398 } else
399 ret = bwsalloc(0);
400
401 return (ret);
402 }
403
404 /*
405 * Preprocesses a line applying the necessary transformations
406 * specified by command line options and returns the preprocessed
407 * string, which can be used to compare.
408 */
409 int
preproc(struct bwstring * s,struct keys_array * ka)410 preproc(struct bwstring *s, struct keys_array *ka)
411 {
412
413 if (sort_opts_vals.kflag)
414 for (size_t i = 0; i < keys_num; i++) {
415 struct bwstring *key;
416 struct key_specs *kspecs;
417 struct sort_mods *sm;
418
419 kspecs = &(keys[i]);
420 key = cut_field(s, kspecs);
421
422 sm = &(kspecs->sm);
423 if (sm->dflag)
424 key = dictionary_order(key);
425 else if (sm->iflag)
426 key = ignore_nonprinting(key);
427 if (sm->fflag || sm->Mflag)
428 key = ignore_case(key);
429
430 set_key_on_keys_array(ka, key, i);
431 }
432 else {
433 struct bwstring *ret = NULL;
434 struct sort_mods *sm = default_sort_mods;
435
436 if (sm->bflag) {
437 if (ret == NULL)
438 ret = bwsdup(s);
439 ret = ignore_leading_blanks(ret);
440 }
441 if (sm->dflag) {
442 if (ret == NULL)
443 ret = bwsdup(s);
444 ret = dictionary_order(ret);
445 } else if (sm->iflag) {
446 if (ret == NULL)
447 ret = bwsdup(s);
448 ret = ignore_nonprinting(ret);
449 }
450 if (sm->fflag || sm->Mflag) {
451 if (ret == NULL)
452 ret = bwsdup(s);
453 ret = ignore_case(ret);
454 }
455 if (ret == NULL)
456 set_key_on_keys_array(ka, s, 0);
457 else
458 set_key_on_keys_array(ka, ret, 0);
459 }
460
461 return 0;
462 }
463
464 cmpcoll_t
get_sort_func(struct sort_mods * sm)465 get_sort_func(struct sort_mods *sm)
466 {
467
468 if (sm->nflag)
469 return (numcoll);
470 else if (sm->hflag)
471 return (hnumcoll);
472 else if (sm->gflag)
473 return (gnumcoll);
474 else if (sm->Mflag)
475 return (monthcoll);
476 else if (sm->Rflag)
477 return (randomcoll);
478 else if (sm->Vflag)
479 return (versioncoll);
480 else
481 return (wstrcoll);
482 }
483
484 /*
485 * Compares the given strings. Returns a positive number if
486 * the first precedes the second, a negative number if the second is
487 * the preceding one, and zero if they are equal. This function calls
488 * the underlying collate functions, which done the actual comparison.
489 */
490 int
key_coll(struct keys_array * ps1,struct keys_array * ps2,size_t offset)491 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
492 {
493 struct key_value *kv1, *kv2;
494 struct sort_mods *sm;
495 int res = 0;
496
497 for (size_t i = 0; i < keys_num; ++i) {
498 kv1 = get_key_from_keys_array(ps1, i);
499 kv2 = get_key_from_keys_array(ps2, i);
500 sm = &(keys[i].sm);
501
502 if (sm->rflag)
503 res = sm->func(kv2, kv1, offset);
504 else
505 res = sm->func(kv1, kv2, offset);
506
507 if (res)
508 break;
509
510 /* offset applies to only the first key */
511 offset = 0;
512 }
513 return (res);
514 }
515
516 /*
517 * Compare two strings.
518 * Plain symbol-by-symbol comparison.
519 */
520 int
top_level_str_coll(const struct bwstring * s1,const struct bwstring * s2)521 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
522 {
523
524 if (default_sort_mods->rflag) {
525 const struct bwstring *tmp;
526
527 tmp = s1;
528 s1 = s2;
529 s2 = tmp;
530 }
531
532 return (bwscoll(s1, s2, 0));
533 }
534
535 /*
536 * Compare a string and a sort list item, according to the sort specs.
537 */
538 int
str_list_coll(struct bwstring * str1,struct sort_list_item ** ss2)539 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
540 {
541 struct keys_array *ka1;
542 int ret = 0;
543
544 ka1 = keys_array_alloc();
545
546 preproc(str1, ka1);
547
548 sort_list_item_make_key(*ss2);
549
550 if (debug_sort) {
551 bwsprintf(stdout, str1, "; s1=<", ">");
552 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
553 }
554
555 ret = key_coll(ka1, &((*ss2)->ka), 0);
556
557 if (debug_sort)
558 printf("; cmp1=%d", ret);
559
560 clean_keys_array(str1, ka1);
561 sort_free(ka1);
562
563 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
564 ret = top_level_str_coll(str1, ((*ss2)->str));
565 if (debug_sort)
566 printf("; cmp2=%d", ret);
567 }
568
569 if (debug_sort)
570 printf("\n");
571
572 return (ret);
573 }
574
575 /*
576 * Compare two sort list items, according to the sort specs.
577 */
578 int
list_coll_offset(struct sort_list_item ** ss1,struct sort_list_item ** ss2,size_t offset)579 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
580 size_t offset)
581 {
582 int ret;
583
584 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
585
586 if (debug_sort) {
587 if (offset)
588 printf("; offset=%d", (int) offset);
589 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
590 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
591 printf("; cmp1=%d\n", ret);
592 }
593
594 if (ret)
595 return (ret);
596
597 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
598 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
599 if (debug_sort)
600 printf("; cmp2=%d\n", ret);
601 }
602
603 return (ret);
604 }
605
606 /*
607 * Compare two sort list items, according to the sort specs.
608 */
609 int
list_coll(struct sort_list_item ** ss1,struct sort_list_item ** ss2)610 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
611 {
612
613 return (list_coll_offset(ss1, ss2, 0));
614 }
615
616 #define LSCDEF(N) \
617 static int \
618 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
619 { \
620 \
621 return (list_coll_offset(ss1, ss2, N)); \
622 }
623
624 LSCDEF(1)
625 LSCDEF(2)
626 LSCDEF(3)
627 LSCDEF(4)
628 LSCDEF(5)
629 LSCDEF(6)
630 LSCDEF(7)
631 LSCDEF(8)
632 LSCDEF(9)
633 LSCDEF(10)
634 LSCDEF(11)
635 LSCDEF(12)
636 LSCDEF(13)
637 LSCDEF(14)
638 LSCDEF(15)
639 LSCDEF(16)
640 LSCDEF(17)
641 LSCDEF(18)
642 LSCDEF(19)
643 LSCDEF(20)
644
645 listcoll_t
get_list_call_func(size_t offset)646 get_list_call_func(size_t offset)
647 {
648 static const listcoll_t lsarray[] = { list_coll, list_coll_1,
649 list_coll_2, list_coll_3, list_coll_4, list_coll_5,
650 list_coll_6, list_coll_7, list_coll_8, list_coll_9,
651 list_coll_10, list_coll_11, list_coll_12, list_coll_13,
652 list_coll_14, list_coll_15, list_coll_16, list_coll_17,
653 list_coll_18, list_coll_19, list_coll_20 };
654
655 if (offset <= 20)
656 return (lsarray[offset]);
657
658 return (list_coll);
659 }
660
661 /*
662 * Compare two sort list items, only by their original string.
663 */
664 int
list_coll_by_str_only(struct sort_list_item ** ss1,struct sort_list_item ** ss2)665 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
666 {
667
668 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
669 }
670
671 /*
672 * Maximum size of a number in the string (before or after decimal point)
673 */
674 #define MAX_NUM_SIZE (128)
675
676 /*
677 * Set suffix value
678 */
setsuffix(wchar_t c,unsigned char * si)679 static void setsuffix(wchar_t c, unsigned char *si)
680 {
681 switch (c){
682 case L'k':
683 case L'K':
684 *si = 1;
685 break;
686 case L'M':
687 *si = 2;
688 break;
689 case L'G':
690 *si = 3;
691 break;
692 case L'T':
693 *si = 4;
694 break;
695 case L'P':
696 *si = 5;
697 break;
698 case L'E':
699 *si = 6;
700 break;
701 case L'Z':
702 *si = 7;
703 break;
704 case L'Y':
705 *si = 8;
706 break;
707 default:
708 *si = 0;
709 };
710 }
711
712 /*
713 * Read string s and parse the string into a fixed-decimal-point number.
714 * sign equals -1 if the number is negative (explicit plus is not allowed,
715 * according to GNU sort's "info sort".
716 * The number part before decimal point is in the smain, after the decimal
717 * point is in sfrac, tail is the pointer to the remainder of the string.
718 */
719 static int
read_number(struct bwstring * s0,int * sign,wchar_t * smain,size_t * main_len,wchar_t * sfrac,size_t * frac_len,unsigned char * si)720 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
721 {
722 bwstring_iterator s;
723
724 s = bws_begin(s0);
725
726 /* always end the fraction with zero, even if we have no fraction */
727 sfrac[0] = 0;
728
729 while (iswblank(bws_get_iter_value(s)))
730 s = bws_iterator_inc(s, 1);
731
732 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
733 *sign = -1;
734 s = bws_iterator_inc(s, 1);
735 }
736
737 // This is '0', not '\0', do not change this
738 while (iswdigit(bws_get_iter_value(s)) &&
739 (bws_get_iter_value(s) == L'0'))
740 s = bws_iterator_inc(s, 1);
741
742 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
743 if (iswdigit(bws_get_iter_value(s))) {
744 smain[*main_len] = bws_get_iter_value(s);
745 s = bws_iterator_inc(s, 1);
746 *main_len += 1;
747 } else if (symbol_thousands_sep &&
748 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
749 s = bws_iterator_inc(s, 1);
750 else
751 break;
752 }
753
754 smain[*main_len] = 0;
755
756 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
757 s = bws_iterator_inc(s, 1);
758 while (iswdigit(bws_get_iter_value(s)) &&
759 *frac_len < MAX_NUM_SIZE) {
760 sfrac[*frac_len] = bws_get_iter_value(s);
761 s = bws_iterator_inc(s, 1);
762 *frac_len += 1;
763 }
764 sfrac[*frac_len] = 0;
765
766 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
767 --(*frac_len);
768 sfrac[*frac_len] = L'\0';
769 }
770 }
771
772 setsuffix(bws_get_iter_value(s),si);
773
774 if ((*main_len + *frac_len) == 0)
775 *sign = 0;
776
777 return (0);
778 }
779
780 /*
781 * Implements string sort.
782 */
783 static int
wstrcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)784 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
785 {
786
787 if (debug_sort) {
788 if (offset)
789 printf("; offset=%d\n", (int) offset);
790 bwsprintf(stdout, kv1->k, "; k1=<", ">");
791 printf("(%zu)", BWSLEN(kv1->k));
792 bwsprintf(stdout, kv2->k, ", k2=<", ">");
793 printf("(%zu)", BWSLEN(kv2->k));
794 }
795
796 return (bwscoll(kv1->k, kv2->k, offset));
797 }
798
799 /*
800 * Compare two suffixes
801 */
802 static inline int
cmpsuffix(unsigned char si1,unsigned char si2)803 cmpsuffix(unsigned char si1, unsigned char si2)
804 {
805
806 return ((char)si1 - (char)si2);
807 }
808
809 /*
810 * Implements numeric sort for -n and -h.
811 */
812 static int
numcoll_impl(struct key_value * kv1,struct key_value * kv2,size_t offset __unused,bool use_suffix)813 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
814 size_t offset __unused, bool use_suffix)
815 {
816 struct bwstring *s1, *s2;
817 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
818 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
819 int cmp_res, sign1, sign2;
820 size_t frac1, frac2, main1, main2;
821 unsigned char SI1, SI2;
822 bool e1, e2, key1_read, key2_read;
823
824 s1 = kv1->k;
825 s2 = kv2->k;
826 sign1 = sign2 = 0;
827 main1 = main2 = 0;
828 frac1 = frac2 = 0;
829
830 cmp_res = 0;
831 key1_read = key2_read = false;
832
833 if (debug_sort) {
834 bwsprintf(stdout, s1, "; k1=<", ">");
835 bwsprintf(stdout, s2, ", k2=<", ">");
836 }
837
838 if (s1 == s2)
839 return (0);
840
841 if (kv1->hint->status == HS_UNINITIALIZED) {
842 /* read the number from the string */
843 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
844 key1_read = true;
845 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
846 if(main1 < 1 && frac1 < 1)
847 kv1->hint->v.nh.empty=true;
848 kv1->hint->v.nh.si = SI1;
849 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
850 HS_INITIALIZED : HS_ERROR;
851 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
852 }
853
854 if (kv2->hint->status == HS_UNINITIALIZED) {
855 /* read the number from the string */
856 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
857 key2_read = true;
858 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
859 if(main2 < 1 && frac2 < 1)
860 kv2->hint->v.nh.empty=true;
861 kv2->hint->v.nh.si = SI2;
862 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
863 HS_INITIALIZED : HS_ERROR;
864 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
865 }
866
867 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
868 HS_INITIALIZED) {
869 unsigned long long n1, n2;
870 bool neg1, neg2;
871
872 e1 = kv1->hint->v.nh.empty;
873 e2 = kv2->hint->v.nh.empty;
874
875 if (e1 && e2)
876 return (0);
877
878 neg1 = kv1->hint->v.nh.neg;
879 neg2 = kv2->hint->v.nh.neg;
880
881 if (neg1 && !neg2)
882 return (-1);
883 if (neg2 && !neg1)
884 return (+1);
885
886 if (e1)
887 return (neg2 ? +1 : -1);
888 else if (e2)
889 return (neg1 ? -1 : +1);
890
891
892 if (use_suffix) {
893 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
894 if (cmp_res)
895 return (neg1 ? -cmp_res : cmp_res);
896 }
897
898 n1 = kv1->hint->v.nh.n1;
899 n2 = kv2->hint->v.nh.n1;
900 if (n1 < n2)
901 return (neg1 ? +1 : -1);
902 else if (n1 > n2)
903 return (neg1 ? -1 : +1);
904 }
905
906 /* read the numbers from the strings */
907 if (!key1_read)
908 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
909 if (!key2_read)
910 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
911
912 e1 = ((main1 + frac1) == 0);
913 e2 = ((main2 + frac2) == 0);
914
915 if (e1 && e2)
916 return (0);
917
918 /* we know the result if the signs are different */
919 if (sign1 < 0 && sign2 >= 0)
920 return (-1);
921 if (sign1 >= 0 && sign2 < 0)
922 return (+1);
923
924 if (e1)
925 return ((sign2 < 0) ? +1 : -1);
926 else if (e2)
927 return ((sign1 < 0) ? -1 : +1);
928
929 if (use_suffix) {
930 cmp_res = cmpsuffix(SI1, SI2);
931 if (cmp_res)
932 return ((sign1 < 0) ? -cmp_res : cmp_res);
933 }
934
935 /* if both numbers are empty assume that the strings are equal */
936 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
937 return (0);
938
939 /*
940 * if the main part is of different size, we know the result
941 * (because the leading zeros are removed)
942 */
943 if (main1 < main2)
944 cmp_res = -1;
945 else if (main1 > main2)
946 cmp_res = +1;
947 /* if the sizes are equal then simple non-collate string compare gives the correct result */
948 else
949 cmp_res = wcscmp(smain1, smain2);
950
951 /* check fraction */
952 if (!cmp_res)
953 cmp_res = wcscmp(sfrac1, sfrac2);
954
955 if (!cmp_res)
956 return (0);
957
958 /* reverse result if the signs are negative */
959 if (sign1 < 0 && sign2 < 0)
960 cmp_res = -cmp_res;
961
962 return (cmp_res);
963 }
964
965 /*
966 * Implements numeric sort (-n).
967 */
968 static int
numcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)969 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
970 {
971
972 return (numcoll_impl(kv1, kv2, offset, false));
973 }
974
975 /*
976 * Implements 'human' numeric sort (-h).
977 */
978 static int
hnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)979 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
980 {
981
982 return (numcoll_impl(kv1, kv2, offset, true));
983 }
984
985 /*
986 * Implements random sort (-R).
987 */
988 static int
randomcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)989 randomcoll(struct key_value *kv1, struct key_value *kv2,
990 size_t offset __unused)
991 {
992 struct bwstring *s1, *s2;
993 MD5_CTX ctx1, ctx2;
994 char *b1, *b2;
995
996 s1 = kv1->k;
997 s2 = kv2->k;
998
999 if (debug_sort) {
1000 bwsprintf(stdout, s1, "; k1=<", ">");
1001 bwsprintf(stdout, s2, ", k2=<", ">");
1002 }
1003
1004 if (s1 == s2)
1005 return (0);
1006
1007 memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
1008 memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
1009
1010 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1011 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1012 b1 = MD5End(&ctx1, NULL);
1013 b2 = MD5End(&ctx2, NULL);
1014 if (b1 == NULL) {
1015 if (b2 == NULL)
1016 return (0);
1017 else {
1018 sort_free(b2);
1019 return (-1);
1020 }
1021 } else if (b2 == NULL) {
1022 sort_free(b1);
1023 return (+1);
1024 } else {
1025 int cmp_res;
1026
1027 cmp_res = strcmp(b1,b2);
1028 sort_free(b1);
1029 sort_free(b2);
1030
1031 if (!cmp_res)
1032 cmp_res = bwscoll(s1, s2, 0);
1033
1034 return (cmp_res);
1035 }
1036 }
1037
1038 /*
1039 * Implements version sort (-V).
1040 */
1041 static int
versioncoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1042 versioncoll(struct key_value *kv1, struct key_value *kv2,
1043 size_t offset __unused)
1044 {
1045 struct bwstring *s1, *s2;
1046
1047 s1 = kv1->k;
1048 s2 = kv2->k;
1049
1050 if (debug_sort) {
1051 bwsprintf(stdout, s1, "; k1=<", ">");
1052 bwsprintf(stdout, s2, ", k2=<", ">");
1053 }
1054
1055 if (s1 == s2)
1056 return (0);
1057
1058 return (vcmp(s1, s2));
1059 }
1060
1061 /*
1062 * Check for minus infinity
1063 */
1064 static inline bool
huge_minus(double d,int err1)1065 huge_minus(double d, int err1)
1066 {
1067
1068 if (err1 == ERANGE)
1069 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1070 return (+1);
1071
1072 return (0);
1073 }
1074
1075 /*
1076 * Check for plus infinity
1077 */
1078 static inline bool
huge_plus(double d,int err1)1079 huge_plus(double d, int err1)
1080 {
1081
1082 if (err1 == ERANGE)
1083 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1084 return (+1);
1085
1086 return (0);
1087 }
1088
1089 /*
1090 * Check whether a function is a NAN
1091 */
1092 static bool
is_nan(double d)1093 is_nan(double d)
1094 {
1095
1096 return ((d == NAN) || (isnan(d)));
1097 }
1098
1099 /*
1100 * Compare two NANs
1101 */
1102 static int
cmp_nans(double d1,double d2)1103 cmp_nans(double d1, double d2)
1104 {
1105
1106 if (d1 < d2)
1107 return (-1);
1108 if (d1 > d2)
1109 return (+1);
1110 return (0);
1111 }
1112
1113 /*
1114 * Implements general numeric sort (-g).
1115 */
1116 static int
gnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1117 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1118 size_t offset __unused)
1119 {
1120 double d1, d2;
1121 int err1, err2;
1122 bool empty1, empty2, key1_read, key2_read;
1123
1124 d1 = d2 = 0;
1125 err1 = err2 = 0;
1126 key1_read = key2_read = false;
1127
1128 if (debug_sort) {
1129 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1130 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1131 }
1132
1133 if (kv1->hint->status == HS_UNINITIALIZED) {
1134 errno = 0;
1135 d1 = bwstod(kv1->k, &empty1);
1136 err1 = errno;
1137
1138 if (empty1)
1139 kv1->hint->v.gh.notnum = true;
1140 else if (err1 == 0) {
1141 kv1->hint->v.gh.d = d1;
1142 kv1->hint->v.gh.nan = is_nan(d1);
1143 kv1->hint->status = HS_INITIALIZED;
1144 } else
1145 kv1->hint->status = HS_ERROR;
1146
1147 key1_read = true;
1148 }
1149
1150 if (kv2->hint->status == HS_UNINITIALIZED) {
1151 errno = 0;
1152 d2 = bwstod(kv2->k, &empty2);
1153 err2 = errno;
1154
1155 if (empty2)
1156 kv2->hint->v.gh.notnum = true;
1157 else if (err2 == 0) {
1158 kv2->hint->v.gh.d = d2;
1159 kv2->hint->v.gh.nan = is_nan(d2);
1160 kv2->hint->status = HS_INITIALIZED;
1161 } else
1162 kv2->hint->status = HS_ERROR;
1163
1164 key2_read = true;
1165 }
1166
1167 if (kv1->hint->status == HS_INITIALIZED &&
1168 kv2->hint->status == HS_INITIALIZED) {
1169 if (kv1->hint->v.gh.notnum)
1170 return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1171 else if (kv2->hint->v.gh.notnum)
1172 return (+1);
1173
1174 if (kv1->hint->v.gh.nan)
1175 return ((kv2->hint->v.gh.nan) ?
1176 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1177 -1);
1178 else if (kv2->hint->v.gh.nan)
1179 return (+1);
1180
1181 d1 = kv1->hint->v.gh.d;
1182 d2 = kv2->hint->v.gh.d;
1183
1184 if (d1 < d2)
1185 return (-1);
1186 else if (d1 > d2)
1187 return (+1);
1188 else
1189 return (0);
1190 }
1191
1192 if (!key1_read) {
1193 errno = 0;
1194 d1 = bwstod(kv1->k, &empty1);
1195 err1 = errno;
1196 }
1197
1198 if (!key2_read) {
1199 errno = 0;
1200 d2 = bwstod(kv2->k, &empty2);
1201 err2 = errno;
1202 }
1203
1204 /* Non-value case: */
1205 if (empty1)
1206 return (empty2 ? 0 : -1);
1207 else if (empty2)
1208 return (+1);
1209
1210 /* NAN case */
1211 if (is_nan(d1))
1212 return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1213 else if (is_nan(d2))
1214 return (+1);
1215
1216 /* Infinities */
1217 if (err1 == ERANGE || err2 == ERANGE) {
1218 /* Minus infinity case */
1219 if (huge_minus(d1, err1)) {
1220 if (huge_minus(d2, err2)) {
1221 if (d1 < d2)
1222 return (-1);
1223 if (d1 > d2)
1224 return (+1);
1225 return (0);
1226 } else
1227 return (-1);
1228
1229 } else if (huge_minus(d2, err2)) {
1230 if (huge_minus(d1, err1)) {
1231 if (d1 < d2)
1232 return (-1);
1233 if (d1 > d2)
1234 return (+1);
1235 return (0);
1236 } else
1237 return (+1);
1238 }
1239
1240 /* Plus infinity case */
1241 if (huge_plus(d1, err1)) {
1242 if (huge_plus(d2, err2)) {
1243 if (d1 < d2)
1244 return (-1);
1245 if (d1 > d2)
1246 return (+1);
1247 return (0);
1248 } else
1249 return (+1);
1250 } else if (huge_plus(d2, err2)) {
1251 if (huge_plus(d1, err1)) {
1252 if (d1 < d2)
1253 return (-1);
1254 if (d1 > d2)
1255 return (+1);
1256 return (0);
1257 } else
1258 return (-1);
1259 }
1260 }
1261
1262 if (d1 < d2)
1263 return (-1);
1264 if (d1 > d2)
1265 return (+1);
1266
1267 return (0);
1268 }
1269
1270 /*
1271 * Implements month sort (-M).
1272 */
1273 static int
monthcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1274 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1275 {
1276 int val1, val2;
1277 bool key1_read, key2_read;
1278
1279 val1 = val2 = 0;
1280 key1_read = key2_read = false;
1281
1282 if (debug_sort) {
1283 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1284 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1285 }
1286
1287 if (kv1->hint->status == HS_UNINITIALIZED) {
1288 kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1289 key1_read = true;
1290 kv1->hint->status = HS_INITIALIZED;
1291 }
1292
1293 if (kv2->hint->status == HS_UNINITIALIZED) {
1294 kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1295 key2_read = true;
1296 kv2->hint->status = HS_INITIALIZED;
1297 }
1298
1299 if (kv1->hint->status == HS_INITIALIZED) {
1300 val1 = kv1->hint->v.Mh.m;
1301 key1_read = true;
1302 }
1303
1304 if (kv2->hint->status == HS_INITIALIZED) {
1305 val2 = kv2->hint->v.Mh.m;
1306 key2_read = true;
1307 }
1308
1309 if (!key1_read)
1310 val1 = bws_month_score(kv1->k);
1311 if (!key2_read)
1312 val2 = bws_month_score(kv2->k);
1313
1314 if (val1 == val2) {
1315 return (0);
1316 }
1317 if (val1 < val2)
1318 return (-1);
1319 return (+1);
1320 }
1321