1 /*        $NetBSD: split.c,v 1.33 2024/02/09 22:08:38 andvar Exp $    */
2 
3 /*
4  * Copyright (c) 1987, 1993, 1994
5  *        The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1987, 1993, 1994\
35  The Regents of the University of California.  All rights reserved.");
36 #endif /* not lint */
37 
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)split.c     8.3 (Berkeley) 4/25/94";
41 #endif
42 __RCSID("$NetBSD: split.c,v 1.33 2024/02/09 22:08:38 andvar Exp $");
43 #endif /* not lint */
44 
45 #include <sys/param.h>
46 #include <sys/stat.h>
47 
48 #include <ctype.h>
49 #include <err.h>
50 #include <errno.h>
51 #include <fcntl.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <unistd.h>
56 
57 #define DEFLINE     1000                /* Default num lines per file. */
58 
59 static int file_open;                   /* If a file is open. */
60 static int ifd = STDIN_FILENO, ofd = -1; /* Input/output file descriptors. */
61 static char *fname;           /* File name prefix. */
62 static size_t sfxlen = 2;     /* Suffix length. */
63 static int autosfx = 1;                 /* Whether to auto-extend the suffix length. */
64 
65 static void newfile(void);
66 static void split1(off_t, int) __dead;
67 static void split2(off_t) __dead;
68 static void split3(off_t) __dead;
69 static void usage(void) __dead;
70 static size_t bigwrite(int, void const *, size_t);
71 
72 int
main(int argc,char * argv[])73 main(int argc, char *argv[])
74 {
75           int ch;
76           char *ep, *p;
77           char const *base;
78           off_t bytecnt = 0;  /* Byte count to split on. */
79           off_t numlines = 0; /* Line count to split on. */
80           off_t chunks = 0;   /* Number of chunks to split into. */
81 
82           while ((ch = getopt(argc, argv, "0123456789a:b:l:n:")) != -1)
83                     switch (ch) {
84                     case '0': case '1': case '2': case '3': case '4':
85                     case '5': case '6': case '7': case '8': case '9':
86                               /*
87                                * Undocumented kludge: split was originally designed
88                                * to take a number after a dash.
89                                */
90                               if (numlines == 0) {
91                                         p = argv[optind - 1];
92                                         if (p[0] == '-' && p[1] == ch && !p[2])
93                                                   p++;
94                                         else
95                                                   p = argv[optind] + 1;
96                                         numlines = strtoull(p, &ep, 10);
97                                         if (numlines == 0 || *ep != '\0')
98                                                   errx(EXIT_FAILURE, "%s: illegal line count.", p);
99                               }
100                               break;
101                     case 'a':           /* Suffix length. */
102                               if (!isdigit((unsigned char)optarg[0]) ||
103                                   (sfxlen = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
104                                   *ep != '\0')
105                                         errx(EXIT_FAILURE, "%s: illegal suffix length.", optarg);
106                               autosfx = 0;
107                               break;
108                     case 'b':           /* Byte count. */
109                               if (!isdigit((unsigned char)optarg[0]) ||
110                                   (bytecnt = strtoull(optarg, &ep, 10)) == 0 ||
111                                   (*ep != '\0' && *ep != 'k' && *ep != 'm'))
112                                         errx(EXIT_FAILURE, "%s: illegal byte count.", optarg);
113                               if (*ep == 'k')
114                                         bytecnt *= 1024;
115                               else if (*ep == 'm')
116                                         bytecnt *= 1024 * 1024;
117                               break;
118                     case 'l':           /* Line count. */
119                               if (numlines != 0)
120                                         usage();
121                               if (!isdigit((unsigned char)optarg[0]) ||
122                                   (numlines = strtoull(optarg, &ep, 10)) == 0 ||
123                                   *ep != '\0')
124                                         errx(EXIT_FAILURE, "%s: illegal line count.", optarg);
125                               break;
126                     case 'n':           /* Chunks. */
127                               if (!isdigit((unsigned char)optarg[0]) ||
128                                   (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
129                                   *ep != '\0')
130                                         errx(EXIT_FAILURE, "%s: illegal number of chunks.", optarg);
131                               break;
132                     default:
133                               usage();
134                     }
135           argv += optind;
136           argc -= optind;
137 
138           if (*argv != NULL) {
139                     if (strcmp(*argv, "-") != 0 &&
140                         (ifd = open(*argv, O_RDONLY, 0)) < 0)
141                               err(EXIT_FAILURE, "%s", *argv);
142                     ++argv;
143           }
144 
145 
146           base = (*argv != NULL) ? *argv++ : "x";
147           if ((fname = malloc(strlen(base) + sfxlen + 1)) == NULL)
148                     err(EXIT_FAILURE, NULL);
149           (void)strcpy(fname, base);              /* File name prefix. */
150 
151           if (*argv != NULL)
152                     usage();
153 
154           if (numlines == 0)
155                     numlines = DEFLINE;
156           else if (bytecnt || chunks)
157                     usage();
158 
159           if (bytecnt && chunks)
160                     usage();
161 
162           if (bytecnt)
163                     split1(bytecnt, 0);
164           else if (chunks)
165                     split3(chunks);
166           else
167                     split2(numlines);
168 
169           return 0;
170 }
171 
172 /*
173  * split1 --
174  *        Split the input by bytes.
175  */
176 static void
split1(off_t bytecnt,int maxcnt)177 split1(off_t bytecnt, int maxcnt)
178 {
179           off_t bcnt;
180           ssize_t dist, len;
181           char *C;
182           char bfr[MAXBSIZE];
183           int nfiles;
184 
185           nfiles = 0;
186 
187           for (bcnt = 0;;)
188                     switch (len = read(ifd, bfr, MAXBSIZE)) {
189                     case 0:
190                               exit(EXIT_SUCCESS);
191                               /* NOTREACHED */
192                     case -1:
193                               err(EXIT_FAILURE, "read");
194                               /* NOTREACHED */
195                     default:
196                               if (!file_open) {
197                                         if (!maxcnt || (nfiles < maxcnt)) {
198                                                   newfile();
199                                                   nfiles++;
200                                                   file_open = 1;
201                                         }
202                               }
203                               if (bcnt + len >= bytecnt) {
204                                         /* LINTED: bytecnt - bcnt <= len */
205                                         dist = bytecnt - bcnt;
206                                         if (bigwrite(ofd, bfr, dist) != (size_t)dist)
207                                                   err(EXIT_FAILURE, "write");
208                                         len -= dist;
209                                         for (C = bfr + dist; len >= bytecnt;
210                                             /* LINTED: bytecnt <= len */
211                                             len -= bytecnt, C += bytecnt) {
212                                                   if (!maxcnt || (nfiles < maxcnt)) {
213                                                             newfile();
214                                                             nfiles++;
215                                                   }
216                                                   /* LINTED: as above */
217                                                   if (bigwrite(ofd,
218                                                       C, bytecnt) != (size_t)bytecnt)
219                                                             err(EXIT_FAILURE, "write");
220                                         }
221                                         if (len) {
222                                                   if (!maxcnt || (nfiles < maxcnt)) {
223                                                             newfile();
224                                                             nfiles++;
225                                                   }
226                                                   /* LINTED: len >= 0 */
227                                                   if (bigwrite(ofd, C, len) != (size_t)len)
228                                                             err(EXIT_FAILURE, "write");
229                                         } else
230                                                   file_open = 0;
231                                         bcnt = len;
232                               } else {
233                                         bcnt += len;
234                                         /* LINTED: len >= 0 */
235                                         if (bigwrite(ofd, bfr, len) != (size_t)len)
236                                                   err(EXIT_FAILURE, "write");
237                               }
238                     }
239 }
240 
241 /*
242  * split2 --
243  *        Split the input by lines.
244  */
245 static void
split2(off_t numlines)246 split2(off_t numlines)
247 {
248           off_t lcnt;
249           size_t bcnt;
250           ssize_t len;
251           char *Ce, *Cs;
252           char bfr[MAXBSIZE];
253 
254           for (lcnt = 0;;)
255                     switch (len = read(ifd, bfr, MAXBSIZE)) {
256                     case 0:
257                               exit(EXIT_SUCCESS);
258                               /* NOTREACHED */
259                     case -1:
260                               err(EXIT_FAILURE, "read");
261                               /* NOTREACHED */
262                     default:
263                               if (!file_open) {
264                                         newfile();
265                                         file_open = 1;
266                               }
267                               for (Cs = Ce = bfr; len--; Ce++)
268                                         if (*Ce == '\n' && ++lcnt == numlines) {
269                                                   bcnt = Ce - Cs + 1;
270                                                   if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt)
271                                                             err(EXIT_FAILURE, "write");
272                                                   lcnt = 0;
273                                                   Cs = Ce + 1;
274                                                   if (len)
275                                                             newfile();
276                                                   else
277                                                             file_open = 0;
278                                         }
279                               if (Cs < Ce) {
280                                         bcnt = Ce - Cs;
281                                         if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt)
282                                                   err(EXIT_FAILURE, "write");
283                               }
284                     }
285 }
286 
287 /*
288  * split3 --
289  *        Split the input into specified number of chunks
290  */
291 static void
split3(off_t chunks)292 split3(off_t chunks)
293 {
294           struct stat sb;
295 
296           if (fstat(ifd, &sb) == -1) {
297                     err(EXIT_FAILURE, "stat");
298                     /* NOTREACHED */
299           }
300 
301           if (chunks > sb.st_size) {
302                     errx(EXIT_FAILURE, "can't split into more than %d files",
303                                         (int)sb.st_size);
304                     /* NOTREACHED */
305           }
306 
307           split1(sb.st_size/chunks, chunks);
308 }
309 
310 /*
311  * newfile --
312  *        Open a new output file.
313  */
314 static void
newfile(void)315 newfile(void)
316 {
317           static int fnum;
318           static char *fpnt;
319           int quot, i;
320 
321           if (ofd == -1) {
322                     fpnt = fname + strlen(fname);
323                     fpnt[sfxlen] = '\0';
324           } else if (close(ofd) != 0)
325                     err(EXIT_FAILURE, "%s", fname);
326 
327           quot = fnum;
328 
329           /* If '-a' is not specified, then we automatically expand the
330            * suffix length to accommodate splitting all input.  We do this
331            * by moving the suffix pointer (fpnt) forward and incrementing
332            * sfxlen by one, thereby yielding an additional two characters
333            * and allowing all output files to sort such that 'cat *' yields
334            * the input in order.  I.e., the order is '... xyy xyz xzaaa
335            * xzaab ... xzyzy, xzyzz, xzzaaaa, xzzaaab' and so on. */
336           if (autosfx && (fpnt[0] == 'y') && (strspn(fpnt+1, "z") == strlen(fpnt+1))) {
337                     if ((fname = realloc(fname, strlen(fname) + sfxlen + 2 + 1)) == NULL)
338                               err(EXIT_FAILURE, NULL);
339                               /* NOTREACHED */
340 
341                     fpnt = fname + strlen(fname) - sfxlen;
342                     fpnt[sfxlen + 2] = '\0';
343 
344                     fpnt[0] = 'z';
345                     fpnt[1] = 'a';
346 
347                     /*  Basename | Suffix
348                      *  before:
349                      *  x        | yz
350                      *  after:
351                      *  xz       | a.. */
352                     fpnt++;
353                     sfxlen++;
354 
355                     /* Reset so we start back at all 'a's in our extended suffix. */
356                     quot = 0;
357                     fnum = 0;
358           }
359 
360           for (i = sfxlen - 1; i >= 0; i--) {
361                     fpnt[i] = quot % 26 + 'a';
362                     quot = quot / 26;
363           }
364           if (quot > 0)
365                     errx(EXIT_FAILURE, "too many files.");
366           ++fnum;
367           if ((ofd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE)) < 0)
368                     err(EXIT_FAILURE, "%s", fname);
369 }
370 
371 static size_t
bigwrite(int fd,const void * buf,size_t len)372 bigwrite(int fd, const void *buf, size_t len)
373 {
374           const char *ptr = buf;
375           size_t sofar = 0;
376           ssize_t w;
377 
378           while (len != 0) {
379                     if  ((w = write(fd, ptr, len)) == -1)
380                               return sofar;
381                     len -= w;
382                     ptr += w;
383                     sofar += w;
384           }
385           return sofar;
386 }
387 
388 
389 static void
usage(void)390 usage(void)
391 {
392           (void)fprintf(stderr,
393 "usage: %s [-b byte_count] [-l line_count] [-n chunk_count] [-a suffix_length] "
394 "[file [prefix]]\n", getprogname());
395           exit(EXIT_FAILURE);
396 }
397