1 /*-
2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include <sys/types.h>
32
33 #include <errno.h>
34 #include <err.h>
35 #include <langinfo.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <md5.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <wchar.h>
42 #include <wctype.h>
43
44 #include "coll.h"
45 #include "vsort.h"
46
47 struct key_specs *keys;
48 size_t keys_num = 0;
49
50 wint_t symbol_decimal_point = L'.';
51 /* there is no default thousands separator in collate rules: */
52 wint_t symbol_thousands_sep = 0;
53 wint_t symbol_negative_sign = L'-';
54 wint_t symbol_positive_sign = L'+';
55
56 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
57 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
58 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
59 static int numcoll(struct key_value*, struct key_value *, size_t offset);
60 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
61 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
62 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
63
64 /*
65 * Allocate keys array
66 */
67 struct keys_array *
keys_array_alloc(void)68 keys_array_alloc(void)
69 {
70 struct keys_array *ka;
71 size_t sz;
72
73 sz = keys_array_size();
74 ka = sort_malloc(sz);
75 memset(ka, 0, sz);
76
77 return (ka);
78 }
79
80 /*
81 * Calculate whether we need key hint space
82 */
83 static size_t
key_hint_size(void)84 key_hint_size(void)
85 {
86
87 return (need_hint ? sizeof(struct key_hint) : 0);
88 }
89
90 /*
91 * Calculate keys array size
92 */
93 size_t
keys_array_size(void)94 keys_array_size(void)
95 {
96
97 return (keys_num * (sizeof(struct key_value) + key_hint_size()));
98 }
99
100 /*
101 * Clean data of keys array
102 */
103 void
clean_keys_array(const struct bwstring * s,struct keys_array * ka)104 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
105 {
106
107 if (ka) {
108 for (size_t i = 0; i < keys_num; ++i)
109 if (ka->key[i].k && ka->key[i].k != s)
110 bwsfree(ka->key[i].k);
111 memset(ka, 0, keys_array_size());
112 }
113 }
114
115 /*
116 * Set value of a key in the keys set
117 */
118 void
set_key_on_keys_array(struct keys_array * ka,struct bwstring * s,size_t ind)119 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
120 {
121
122 if (ka && keys_num > ind) {
123 struct key_value *kv;
124
125 kv = &(ka->key[ind]);
126
127 if (kv->k && kv->k != s)
128 bwsfree(kv->k);
129 kv->k = s;
130 }
131 }
132
133 /*
134 * Initialize a sort list item
135 */
136 struct sort_list_item *
sort_list_item_alloc(void)137 sort_list_item_alloc(void)
138 {
139 struct sort_list_item *si;
140 size_t sz;
141
142 sz = sizeof(struct sort_list_item) + keys_array_size();
143 si = sort_malloc(sz);
144 memset(si, 0, sz);
145
146 return (si);
147 }
148
149 size_t
sort_list_item_size(struct sort_list_item * si)150 sort_list_item_size(struct sort_list_item *si)
151 {
152 size_t ret = 0;
153
154 if (si) {
155 ret = sizeof(struct sort_list_item) + keys_array_size();
156 if (si->str)
157 ret += bws_memsize(si->str);
158 for (size_t i = 0; i < keys_num; ++i) {
159 struct key_value *kv;
160
161 kv = &(si->ka.key[i]);
162
163 if (kv->k != si->str)
164 ret += bws_memsize(kv->k);
165 }
166 }
167 return (ret);
168 }
169
170 /*
171 * Calculate key for a sort list item
172 */
173 static void
sort_list_item_make_key(struct sort_list_item * si)174 sort_list_item_make_key(struct sort_list_item *si)
175 {
176
177 preproc(si->str, &(si->ka));
178 }
179
180 /*
181 * Set value of a sort list item.
182 * Return combined string and keys memory size.
183 */
184 void
sort_list_item_set(struct sort_list_item * si,struct bwstring * str)185 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
186 {
187
188 if (si) {
189 clean_keys_array(si->str, &(si->ka));
190 if (si->str) {
191 if (si->str == str) {
192 /* we are trying to reset the same string */
193 return;
194 } else {
195 bwsfree(si->str);
196 si->str = NULL;
197 }
198 }
199 si->str = str;
200 sort_list_item_make_key(si);
201 }
202 }
203
204 /*
205 * De-allocate a sort list item object memory
206 */
207 void
sort_list_item_clean(struct sort_list_item * si)208 sort_list_item_clean(struct sort_list_item *si)
209 {
210
211 if (si) {
212 clean_keys_array(si->str, &(si->ka));
213 if (si->str) {
214 bwsfree(si->str);
215 si->str = NULL;
216 }
217 }
218 }
219
220 /*
221 * Skip columns according to specs
222 */
223 static size_t
skip_cols_to_start(const struct bwstring * s,size_t cols,size_t start,bool skip_blanks,bool * empty_key)224 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
225 bool skip_blanks, bool *empty_key)
226 {
227 if (cols < 1)
228 return (BWSLEN(s) + 1);
229
230 if (skip_blanks)
231 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
232 ++start;
233
234 while (start < BWSLEN(s) && cols > 1) {
235 --cols;
236 ++start;
237 }
238
239 if (start >= BWSLEN(s))
240 *empty_key = true;
241
242 return (start);
243 }
244
245 /*
246 * Skip fields according to specs
247 */
248 static size_t
skip_fields_to_start(const struct bwstring * s,size_t fields,bool * empty_field)249 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
250 {
251
252 if (fields < 2) {
253 if (BWSLEN(s) == 0)
254 *empty_field = true;
255 return (0);
256 } else if (!(sort_opts_vals.tflag)) {
257 size_t cpos = 0;
258 bool pb = true;
259
260 while (cpos < BWSLEN(s)) {
261 bool isblank;
262
263 isblank = iswblank(BWS_GET(s, cpos));
264
265 if (isblank && !pb) {
266 --fields;
267 if (fields <= 1)
268 return (cpos);
269 }
270 pb = isblank;
271 ++cpos;
272 }
273 if (fields > 1)
274 *empty_field = true;
275 return (cpos);
276 } else {
277 size_t cpos = 0;
278
279 while (cpos < BWSLEN(s)) {
280 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
281 --fields;
282 if (fields <= 1)
283 return (cpos + 1);
284 }
285 ++cpos;
286 }
287 if (fields > 1)
288 *empty_field = true;
289 return (cpos);
290 }
291 }
292
293 /*
294 * Find fields start
295 */
296 static void
find_field_start(const struct bwstring * s,struct key_specs * ks,size_t * field_start,size_t * key_start,bool * empty_field,bool * empty_key)297 find_field_start(const struct bwstring *s, struct key_specs *ks,
298 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
299 {
300
301 *field_start = skip_fields_to_start(s, ks->f1, empty_field);
302 if (!*empty_field)
303 *key_start = skip_cols_to_start(s, ks->c1, *field_start,
304 ks->pos1b, empty_key);
305 else
306 *empty_key = true;
307 }
308
309 /*
310 * Find end key position
311 */
312 static size_t
find_field_end(const struct bwstring * s,struct key_specs * ks)313 find_field_end(const struct bwstring *s, struct key_specs *ks)
314 {
315 size_t f2, next_field_start, pos_end;
316 bool empty_field, empty_key;
317
318 pos_end = 0;
319 next_field_start = 0;
320 empty_field = false;
321 empty_key = false;
322 f2 = ks->f2;
323
324 if (f2 == 0)
325 return (BWSLEN(s) + 1);
326 else {
327 if (ks->c2 == 0) {
328 next_field_start = skip_fields_to_start(s, f2 + 1,
329 &empty_field);
330 if ((next_field_start > 0) && sort_opts_vals.tflag &&
331 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
332 next_field_start - 1)))
333 --next_field_start;
334 } else
335 next_field_start = skip_fields_to_start(s, f2,
336 &empty_field);
337 }
338
339 if (empty_field || (next_field_start >= BWSLEN(s)))
340 return (BWSLEN(s) + 1);
341
342 if (ks->c2) {
343 pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
344 ks->pos2b, &empty_key);
345 if (pos_end < BWSLEN(s))
346 ++pos_end;
347 } else
348 pos_end = next_field_start;
349
350 return (pos_end);
351 }
352
353 /*
354 * Cut a field according to the key specs
355 */
356 static struct bwstring *
cut_field(const struct bwstring * s,struct key_specs * ks)357 cut_field(const struct bwstring *s, struct key_specs *ks)
358 {
359 struct bwstring *ret = NULL;
360
361 if (s && ks) {
362 size_t field_start, key_end, key_start, sz;
363 bool empty_field, empty_key;
364
365 field_start = 0;
366 key_start = 0;
367 empty_field = false;
368 empty_key = false;
369
370 find_field_start(s, ks, &field_start, &key_start,
371 &empty_field, &empty_key);
372
373 if (empty_key)
374 sz = 0;
375 else {
376 key_end = find_field_end(s, ks);
377 sz = (key_end < key_start) ? 0 : (key_end - key_start);
378 }
379
380 ret = bwsalloc(sz);
381 if (sz)
382 bwsnocpy(ret, s, key_start, sz);
383 } else
384 ret = bwsalloc(0);
385
386 return (ret);
387 }
388
389 /*
390 * Preprocesses a line applying the necessary transformations
391 * specified by command line options and returns the preprocessed
392 * string, which can be used to compare.
393 */
394 int
preproc(struct bwstring * s,struct keys_array * ka)395 preproc(struct bwstring *s, struct keys_array *ka)
396 {
397
398 if (sort_opts_vals.kflag)
399 for (size_t i = 0; i < keys_num; i++) {
400 struct bwstring *key;
401 struct key_specs *kspecs;
402 struct sort_mods *sm;
403
404 kspecs = &(keys[i]);
405 key = cut_field(s, kspecs);
406
407 sm = &(kspecs->sm);
408 if (sm->dflag)
409 key = dictionary_order(key);
410 else if (sm->iflag)
411 key = ignore_nonprinting(key);
412 if (sm->fflag || sm->Mflag)
413 key = ignore_case(key);
414
415 set_key_on_keys_array(ka, key, i);
416 }
417 else {
418 struct bwstring *ret = NULL;
419 struct sort_mods *sm = default_sort_mods;
420
421 if (sm->bflag) {
422 if (ret == NULL)
423 ret = bwsdup(s);
424 ret = ignore_leading_blanks(ret);
425 }
426 if (sm->dflag) {
427 if (ret == NULL)
428 ret = bwsdup(s);
429 ret = dictionary_order(ret);
430 } else if (sm->iflag) {
431 if (ret == NULL)
432 ret = bwsdup(s);
433 ret = ignore_nonprinting(ret);
434 }
435 if (sm->fflag || sm->Mflag) {
436 if (ret == NULL)
437 ret = bwsdup(s);
438 ret = ignore_case(ret);
439 }
440 if (ret == NULL)
441 set_key_on_keys_array(ka, s, 0);
442 else
443 set_key_on_keys_array(ka, ret, 0);
444 }
445
446 return 0;
447 }
448
449 cmpcoll_t
get_sort_func(struct sort_mods * sm)450 get_sort_func(struct sort_mods *sm)
451 {
452
453 if (sm->nflag)
454 return (numcoll);
455 else if (sm->hflag)
456 return (hnumcoll);
457 else if (sm->gflag)
458 return (gnumcoll);
459 else if (sm->Mflag)
460 return (monthcoll);
461 else if (sm->Rflag)
462 return (randomcoll);
463 else if (sm->Vflag)
464 return (versioncoll);
465 else
466 return (wstrcoll);
467 }
468
469 /*
470 * Compares the given strings. Returns a positive number if
471 * the first precedes the second, a negative number if the second is
472 * the preceding one, and zero if they are equal. This function calls
473 * the underlying collate functions, which done the actual comparison.
474 */
475 int
key_coll(struct keys_array * ps1,struct keys_array * ps2,size_t offset)476 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
477 {
478 struct sort_mods *sm;
479 int res = 0;
480
481 for (size_t i = 0; i < keys_num; ++i) {
482 sm = &(keys[i].sm);
483
484 if (sm->rflag)
485 res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
486 else
487 res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
488
489 if (res)
490 break;
491
492 /* offset applies to only the first key */
493 offset = 0;
494 }
495 return (res);
496 }
497
498 /*
499 * Compare two strings.
500 * Plain symbol-by-symbol comparison.
501 */
502 int
top_level_str_coll(const struct bwstring * s1,const struct bwstring * s2)503 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
504 {
505
506 if (default_sort_mods->rflag) {
507 const struct bwstring *tmp;
508
509 tmp = s1;
510 s1 = s2;
511 s2 = tmp;
512 }
513
514 return (bwscoll(s1, s2, 0));
515 }
516
517 /*
518 * Compare a string and a sort list item, according to the sort specs.
519 */
520 int
str_list_coll(struct bwstring * str1,struct sort_list_item ** ss2)521 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
522 {
523 struct keys_array *ka1;
524 int ret = 0;
525
526 ka1 = keys_array_alloc();
527
528 preproc(str1, ka1);
529
530 sort_list_item_make_key(*ss2);
531
532 if (debug_sort) {
533 bwsprintf(stdout, str1, "; s1=<", ">");
534 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
535 }
536
537 ret = key_coll(ka1, &((*ss2)->ka), 0);
538
539 if (debug_sort)
540 printf("; cmp1=%d", ret);
541
542 clean_keys_array(str1, ka1);
543 sort_free(ka1);
544
545 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
546 ret = top_level_str_coll(str1, ((*ss2)->str));
547 if (debug_sort)
548 printf("; cmp2=%d", ret);
549 }
550
551 if (debug_sort)
552 printf("\n");
553
554 return (ret);
555 }
556
557 /*
558 * Compare two sort list items, according to the sort specs.
559 */
560 int
list_coll_offset(struct sort_list_item ** ss1,struct sort_list_item ** ss2,size_t offset)561 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
562 size_t offset)
563 {
564 int ret;
565
566 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
567
568 if (debug_sort) {
569 if (offset)
570 printf("; offset=%d", (int) offset);
571 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
572 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
573 printf("; cmp1=%d\n", ret);
574 }
575
576 if (ret)
577 return (ret);
578
579 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
580 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
581 if (debug_sort)
582 printf("; cmp2=%d\n", ret);
583 }
584
585 return (ret);
586 }
587
588 /*
589 * Compare two sort list items, according to the sort specs.
590 */
591 int
list_coll(struct sort_list_item ** ss1,struct sort_list_item ** ss2)592 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
593 {
594
595 return (list_coll_offset(ss1, ss2, 0));
596 }
597
598 #define LSCDEF(N) \
599 static int \
600 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
601 { \
602 \
603 return (list_coll_offset(ss1, ss2, N)); \
604 }
605
606 LSCDEF(1)
607 LSCDEF(2)
608 LSCDEF(3)
609 LSCDEF(4)
610 LSCDEF(5)
611 LSCDEF(6)
612 LSCDEF(7)
613 LSCDEF(8)
614 LSCDEF(9)
615 LSCDEF(10)
616 LSCDEF(11)
617 LSCDEF(12)
618 LSCDEF(13)
619 LSCDEF(14)
620 LSCDEF(15)
621 LSCDEF(16)
622 LSCDEF(17)
623 LSCDEF(18)
624 LSCDEF(19)
625 LSCDEF(20)
626
627 listcoll_t
get_list_call_func(size_t offset)628 get_list_call_func(size_t offset)
629 {
630 static const listcoll_t lsarray[] = { list_coll, list_coll_1,
631 list_coll_2, list_coll_3, list_coll_4, list_coll_5,
632 list_coll_6, list_coll_7, list_coll_8, list_coll_9,
633 list_coll_10, list_coll_11, list_coll_12, list_coll_13,
634 list_coll_14, list_coll_15, list_coll_16, list_coll_17,
635 list_coll_18, list_coll_19, list_coll_20 };
636
637 if (offset <= 20)
638 return (lsarray[offset]);
639
640 return (list_coll);
641 }
642
643 /*
644 * Compare two sort list items, only by their original string.
645 */
646 int
list_coll_by_str_only(struct sort_list_item ** ss1,struct sort_list_item ** ss2)647 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
648 {
649
650 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
651 }
652
653 /*
654 * Maximum size of a number in the string (before or after decimal point)
655 */
656 #define MAX_NUM_SIZE (128)
657
658 /*
659 * Set suffix value
660 */
setsuffix(wchar_t c,unsigned char * si)661 static void setsuffix(wchar_t c, unsigned char *si)
662 {
663 switch (c){
664 case L'k':
665 case L'K':
666 *si = 1;
667 break;
668 case L'M':
669 *si = 2;
670 break;
671 case L'G':
672 *si = 3;
673 break;
674 case L'T':
675 *si = 4;
676 break;
677 case L'P':
678 *si = 5;
679 break;
680 case L'E':
681 *si = 6;
682 break;
683 case L'Z':
684 *si = 7;
685 break;
686 case L'Y':
687 *si = 8;
688 break;
689 default:
690 *si = 0;
691 };
692 }
693
694 /*
695 * Read string s and parse the string into a fixed-decimal-point number.
696 * sign equals -1 if the number is negative (explicit plus is not allowed,
697 * according to GNU sort's "info sort".
698 * The number part before decimal point is in the smain, after the decimal
699 * point is in sfrac, tail is the pointer to the remainder of the string.
700 */
701 static int
read_number(struct bwstring * s0,int * sign,wchar_t * smain,size_t * main_len,wchar_t * sfrac,size_t * frac_len,unsigned char * si)702 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
703 {
704 bwstring_iterator s;
705
706 s = bws_begin(s0);
707
708 /* always end the fraction with zero, even if we have no fraction */
709 sfrac[0] = 0;
710
711 while (iswblank(bws_get_iter_value(s)))
712 s = bws_iterator_inc(s, 1);
713
714 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
715 *sign = -1;
716 s = bws_iterator_inc(s, 1);
717 }
718
719 // This is '0', not '\0', do not change this
720 while (iswdigit(bws_get_iter_value(s)) &&
721 (bws_get_iter_value(s) == L'0'))
722 s = bws_iterator_inc(s, 1);
723
724 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
725 if (iswdigit(bws_get_iter_value(s))) {
726 smain[*main_len] = bws_get_iter_value(s);
727 s = bws_iterator_inc(s, 1);
728 *main_len += 1;
729 } else if (symbol_thousands_sep &&
730 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
731 s = bws_iterator_inc(s, 1);
732 else
733 break;
734 }
735
736 smain[*main_len] = 0;
737
738 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
739 s = bws_iterator_inc(s, 1);
740 while (iswdigit(bws_get_iter_value(s)) &&
741 *frac_len < MAX_NUM_SIZE) {
742 sfrac[*frac_len] = bws_get_iter_value(s);
743 s = bws_iterator_inc(s, 1);
744 *frac_len += 1;
745 }
746 sfrac[*frac_len] = 0;
747
748 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
749 --(*frac_len);
750 sfrac[*frac_len] = L'\0';
751 }
752 }
753
754 setsuffix(bws_get_iter_value(s),si);
755
756 if ((*main_len + *frac_len) == 0)
757 *sign = 0;
758
759 return (0);
760 }
761
762 /*
763 * Implements string sort.
764 */
765 static int
wstrcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)766 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
767 {
768
769 if (debug_sort) {
770 if (offset)
771 printf("; offset=%d\n", (int) offset);
772 bwsprintf(stdout, kv1->k, "; k1=<", ">");
773 printf("(%zu)", BWSLEN(kv1->k));
774 bwsprintf(stdout, kv2->k, ", k2=<", ">");
775 printf("(%zu)", BWSLEN(kv2->k));
776 }
777
778 return (bwscoll(kv1->k, kv2->k, offset));
779 }
780
781 /*
782 * Compare two suffixes
783 */
784 static inline int
cmpsuffix(unsigned char si1,unsigned char si2)785 cmpsuffix(unsigned char si1, unsigned char si2)
786 {
787
788 return ((char)si1 - (char)si2);
789 }
790
791 /*
792 * Implements numeric sort for -n and -h.
793 */
794 static int
numcoll_impl(struct key_value * kv1,struct key_value * kv2,size_t offset __unused,bool use_suffix)795 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
796 size_t offset __unused, bool use_suffix)
797 {
798 struct bwstring *s1, *s2;
799 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
800 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
801 int cmp_res, sign1, sign2;
802 size_t frac1, frac2, main1, main2;
803 unsigned char SI1, SI2;
804 bool e1, e2, key1_read, key2_read;
805
806 s1 = kv1->k;
807 s2 = kv2->k;
808 sign1 = sign2 = 0;
809 main1 = main2 = 0;
810 frac1 = frac2 = 0;
811
812 cmp_res = 0;
813 key1_read = key2_read = false;
814
815 if (debug_sort) {
816 bwsprintf(stdout, s1, "; k1=<", ">");
817 bwsprintf(stdout, s2, ", k2=<", ">");
818 }
819
820 if (s1 == s2)
821 return (0);
822
823 if (kv1->hint->status == HS_UNINITIALIZED) {
824 /* read the number from the string */
825 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
826 key1_read = true;
827 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
828 if(main1 < 1 && frac1 < 1)
829 kv1->hint->v.nh.empty=true;
830 kv1->hint->v.nh.si = SI1;
831 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
832 HS_INITIALIZED : HS_ERROR;
833 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
834 }
835
836 if (kv2->hint->status == HS_UNINITIALIZED) {
837 /* read the number from the string */
838 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
839 key2_read = true;
840 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
841 if(main2 < 1 && frac2 < 1)
842 kv2->hint->v.nh.empty=true;
843 kv2->hint->v.nh.si = SI2;
844 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
845 HS_INITIALIZED : HS_ERROR;
846 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
847 }
848
849 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
850 HS_INITIALIZED) {
851 unsigned long long n1, n2;
852 bool neg1, neg2;
853
854 e1 = kv1->hint->v.nh.empty;
855 e2 = kv2->hint->v.nh.empty;
856
857 if (e1 && e2)
858 return (0);
859
860 neg1 = kv1->hint->v.nh.neg;
861 neg2 = kv2->hint->v.nh.neg;
862
863 if (neg1 && !neg2)
864 return (-1);
865 if (neg2 && !neg1)
866 return (+1);
867
868 if (e1)
869 return (neg2 ? +1 : -1);
870 else if (e2)
871 return (neg1 ? -1 : +1);
872
873
874 if (use_suffix) {
875 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
876 if (cmp_res)
877 return (neg1 ? -cmp_res : cmp_res);
878 }
879
880 n1 = kv1->hint->v.nh.n1;
881 n2 = kv2->hint->v.nh.n1;
882 if (n1 < n2)
883 return (neg1 ? +1 : -1);
884 else if (n1 > n2)
885 return (neg1 ? -1 : +1);
886 }
887
888 /* read the numbers from the strings */
889 if (!key1_read)
890 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
891 if (!key2_read)
892 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
893
894 e1 = ((main1 + frac1) == 0);
895 e2 = ((main2 + frac2) == 0);
896
897 if (e1 && e2)
898 return (0);
899
900 /* we know the result if the signs are different */
901 if (sign1 < 0 && sign2 >= 0)
902 return (-1);
903 if (sign1 >= 0 && sign2 < 0)
904 return (+1);
905
906 if (e1)
907 return ((sign2 < 0) ? +1 : -1);
908 else if (e2)
909 return ((sign1 < 0) ? -1 : +1);
910
911 if (use_suffix) {
912 cmp_res = cmpsuffix(SI1, SI2);
913 if (cmp_res)
914 return ((sign1 < 0) ? -cmp_res : cmp_res);
915 }
916
917 /* if both numbers are empty assume that the strings are equal */
918 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
919 return (0);
920
921 /*
922 * if the main part is of different size, we know the result
923 * (because the leading zeros are removed)
924 */
925 if (main1 < main2)
926 cmp_res = -1;
927 else if (main1 > main2)
928 cmp_res = +1;
929 /* if the sizes are equal then simple non-collate string compare gives the correct result */
930 else
931 cmp_res = wcscmp(smain1, smain2);
932
933 /* check fraction */
934 if (!cmp_res)
935 cmp_res = wcscmp(sfrac1, sfrac2);
936
937 if (!cmp_res)
938 return (0);
939
940 /* reverse result if the signs are negative */
941 if (sign1 < 0 && sign2 < 0)
942 cmp_res = -cmp_res;
943
944 return (cmp_res);
945 }
946
947 /*
948 * Implements numeric sort (-n).
949 */
950 static int
numcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)951 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
952 {
953
954 return (numcoll_impl(kv1, kv2, offset, false));
955 }
956
957 /*
958 * Implements 'human' numeric sort (-h).
959 */
960 static int
hnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)961 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
962 {
963
964 return (numcoll_impl(kv1, kv2, offset, true));
965 }
966
967 /*
968 * Implements random sort (-R).
969 */
970 static int
randomcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)971 randomcoll(struct key_value *kv1, struct key_value *kv2,
972 size_t offset __unused)
973 {
974 struct bwstring *s1, *s2;
975 MD5_CTX ctx1, ctx2;
976 char *b1, *b2;
977
978 s1 = kv1->k;
979 s2 = kv2->k;
980
981 if (debug_sort) {
982 bwsprintf(stdout, s1, "; k1=<", ">");
983 bwsprintf(stdout, s2, ", k2=<", ">");
984 }
985
986 if (s1 == s2)
987 return (0);
988
989 memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
990 memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
991
992 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
993 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
994 b1 = MD5End(&ctx1, NULL);
995 b2 = MD5End(&ctx2, NULL);
996 if (b1 == NULL) {
997 if (b2 == NULL)
998 return (0);
999 else {
1000 sort_free(b2);
1001 return (-1);
1002 }
1003 } else if (b2 == NULL) {
1004 sort_free(b1);
1005 return (+1);
1006 } else {
1007 int cmp_res;
1008
1009 cmp_res = strcmp(b1,b2);
1010 sort_free(b1);
1011 sort_free(b2);
1012
1013 if (!cmp_res)
1014 cmp_res = bwscoll(s1, s2, 0);
1015
1016 return (cmp_res);
1017 }
1018 }
1019
1020 /*
1021 * Implements version sort (-V).
1022 */
1023 static int
versioncoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1024 versioncoll(struct key_value *kv1, struct key_value *kv2,
1025 size_t offset __unused)
1026 {
1027 struct bwstring *s1, *s2;
1028
1029 s1 = kv1->k;
1030 s2 = kv2->k;
1031
1032 if (debug_sort) {
1033 bwsprintf(stdout, s1, "; k1=<", ">");
1034 bwsprintf(stdout, s2, ", k2=<", ">");
1035 }
1036
1037 if (s1 == s2)
1038 return (0);
1039
1040 return (vcmp(s1, s2));
1041 }
1042
1043 /*
1044 * Check for minus infinity
1045 */
1046 static inline bool
huge_minus(double d,int err1)1047 huge_minus(double d, int err1)
1048 {
1049
1050 if (err1 == ERANGE)
1051 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1052 return (+1);
1053
1054 return (0);
1055 }
1056
1057 /*
1058 * Check for plus infinity
1059 */
1060 static inline bool
huge_plus(double d,int err1)1061 huge_plus(double d, int err1)
1062 {
1063
1064 if (err1 == ERANGE)
1065 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1066 return (+1);
1067
1068 return (0);
1069 }
1070
1071 /*
1072 * Check whether a function is a NAN
1073 */
1074 static bool
is_nan(double d)1075 is_nan(double d)
1076 {
1077
1078 return ((d == NAN) || (isnan(d)));
1079 }
1080
1081 /*
1082 * Compare two NANs
1083 */
1084 static int
cmp_nans(double d1,double d2)1085 cmp_nans(double d1, double d2)
1086 {
1087
1088 if (d1 < d2)
1089 return (-1);
1090 if (d2 > d2)
1091 return (+1);
1092 return (0);
1093 }
1094
1095 /*
1096 * Implements general numeric sort (-g).
1097 */
1098 static int
gnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1099 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1100 size_t offset __unused)
1101 {
1102 double d1, d2;
1103 int err1, err2;
1104 bool empty1, empty2, key1_read, key2_read;
1105
1106 d1 = d2 = 0;
1107 err1 = err2 = 0;
1108 key1_read = key2_read = false;
1109
1110 if (debug_sort) {
1111 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1112 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1113 }
1114
1115 if (kv1->hint->status == HS_UNINITIALIZED) {
1116 errno = 0;
1117 d1 = bwstod(kv1->k, &empty1);
1118 err1 = errno;
1119
1120 if (empty1)
1121 kv1->hint->v.gh.notnum = true;
1122 else if (err1 == 0) {
1123 kv1->hint->v.gh.d = d1;
1124 kv1->hint->v.gh.nan = is_nan(d1);
1125 kv1->hint->status = HS_INITIALIZED;
1126 } else
1127 kv1->hint->status = HS_ERROR;
1128
1129 key1_read = true;
1130 }
1131
1132 if (kv2->hint->status == HS_UNINITIALIZED) {
1133 errno = 0;
1134 d2 = bwstod(kv2->k, &empty2);
1135 err2 = errno;
1136
1137 if (empty2)
1138 kv2->hint->v.gh.notnum = true;
1139 else if (err2 == 0) {
1140 kv2->hint->v.gh.d = d2;
1141 kv2->hint->v.gh.nan = is_nan(d2);
1142 kv2->hint->status = HS_INITIALIZED;
1143 } else
1144 kv2->hint->status = HS_ERROR;
1145
1146 key2_read = true;
1147 }
1148
1149 if (kv1->hint->status == HS_INITIALIZED &&
1150 kv2->hint->status == HS_INITIALIZED) {
1151 if (kv1->hint->v.gh.notnum)
1152 return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1153 else if (kv2->hint->v.gh.notnum)
1154 return (+1);
1155
1156 if (kv1->hint->v.gh.nan)
1157 return ((kv2->hint->v.gh.nan) ?
1158 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1159 -1);
1160 else if (kv2->hint->v.gh.nan)
1161 return (+1);
1162
1163 d1 = kv1->hint->v.gh.d;
1164 d2 = kv2->hint->v.gh.d;
1165
1166 if (d1 < d2)
1167 return (-1);
1168 else if (d1 > d2)
1169 return (+1);
1170 else
1171 return (0);
1172 }
1173
1174 if (!key1_read) {
1175 errno = 0;
1176 d1 = bwstod(kv1->k, &empty1);
1177 err1 = errno;
1178 }
1179
1180 if (!key2_read) {
1181 errno = 0;
1182 d2 = bwstod(kv2->k, &empty2);
1183 err2 = errno;
1184 }
1185
1186 /* Non-value case: */
1187 if (empty1)
1188 return (empty2 ? 0 : -1);
1189 else if (empty2)
1190 return (+1);
1191
1192 /* NAN case */
1193 if (is_nan(d1))
1194 return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1195 else if (is_nan(d2))
1196 return (+1);
1197
1198 /* Infinities */
1199 if (err1 == ERANGE || err2 == ERANGE) {
1200 /* Minus infinity case */
1201 if (huge_minus(d1, err1)) {
1202 if (huge_minus(d2, err2)) {
1203 if (d1 < d2)
1204 return (-1);
1205 if (d1 > d2)
1206 return (+1);
1207 return (0);
1208 } else
1209 return (-1);
1210
1211 } else if (huge_minus(d2, err2)) {
1212 if (huge_minus(d1, err1)) {
1213 if (d1 < d2)
1214 return (-1);
1215 if (d1 > d2)
1216 return (+1);
1217 return (0);
1218 } else
1219 return (+1);
1220 }
1221
1222 /* Plus infinity case */
1223 if (huge_plus(d1, err1)) {
1224 if (huge_plus(d2, err2)) {
1225 if (d1 < d2)
1226 return (-1);
1227 if (d1 > d2)
1228 return (+1);
1229 return (0);
1230 } else
1231 return (+1);
1232 } else if (huge_plus(d2, err2)) {
1233 if (huge_plus(d1, err1)) {
1234 if (d1 < d2)
1235 return (-1);
1236 if (d1 > d2)
1237 return (+1);
1238 return (0);
1239 } else
1240 return (-1);
1241 }
1242 }
1243
1244 if (d1 < d2)
1245 return (-1);
1246 if (d1 > d2)
1247 return (+1);
1248
1249 return (0);
1250 }
1251
1252 /*
1253 * Implements month sort (-M).
1254 */
1255 static int
monthcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1256 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1257 {
1258 int val1, val2;
1259 bool key1_read, key2_read;
1260
1261 val1 = val2 = 0;
1262 key1_read = key2_read = false;
1263
1264 if (debug_sort) {
1265 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1266 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1267 }
1268
1269 if (kv1->hint->status == HS_UNINITIALIZED) {
1270 kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1271 key1_read = true;
1272 kv1->hint->status = HS_INITIALIZED;
1273 }
1274
1275 if (kv2->hint->status == HS_UNINITIALIZED) {
1276 kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1277 key2_read = true;
1278 kv2->hint->status = HS_INITIALIZED;
1279 }
1280
1281 if (kv1->hint->status == HS_INITIALIZED) {
1282 val1 = kv1->hint->v.Mh.m;
1283 key1_read = true;
1284 }
1285
1286 if (kv2->hint->status == HS_INITIALIZED) {
1287 val2 = kv2->hint->v.Mh.m;
1288 key2_read = true;
1289 }
1290
1291 if (!key1_read)
1292 val1 = bws_month_score(kv1->k);
1293 if (!key2_read)
1294 val2 = bws_month_score(kv2->k);
1295
1296 if (val1 == val2) {
1297 return (0);
1298 }
1299 if (val1 < val2)
1300 return (-1);
1301 return (+1);
1302 }
1303