1 /* $OpenBSD: cut.c,v 1.11 2003/06/10 22:20:45 deraadt Exp $ */
2 /* $NetBSD: cut.c,v 1.9 1995/09/02 05:59:23 jtc Exp $ */
3
4 /*
5 * Copyright (c) 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include <sys/cdefs.h>
37 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\n\
38 The Regents of the University of California. All rights reserved.\n");
39 __SCCSID("@(#)cut.c 8.3 (Berkeley) 5/4/95");
40 __RCSID("$MirOS: src/usr.bin/cut/cut.c,v 1.2 2007/07/05 23:09:39 tg Exp $");
41
42 #include <ctype.h>
43 #include <err.h>
44 #include <errno.h>
45 #include <limits.h>
46 #include <locale.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51
52 int cflag;
53 char dchar;
54 int dflag;
55 int fflag;
56 int sflag;
57
58 void c_cut(FILE *, char *);
59 void f_cut(FILE *, char *);
60 void get_list(char *);
61 void usage(void);
62
63 int
main(int argc,char * argv[])64 main(int argc, char *argv[])
65 {
66 FILE *fp;
67 void (*fcn)(FILE *, char *);
68 int ch;
69
70 #ifndef __MirBSD__
71 setlocale (LC_ALL, "");
72 #endif
73
74 dchar = '\t'; /* default delimiter is \t */
75
76 /* Since we don't support multi-byte characters, the -c and -b
77 options are equivalent, and the -n option is meaningless. */
78 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
79 switch(ch) {
80 case 'b':
81 case 'c':
82 fcn = c_cut;
83 get_list(optarg);
84 cflag = 1;
85 break;
86 case 'd':
87 dchar = *optarg;
88 dflag = 1;
89 break;
90 case 'f':
91 get_list(optarg);
92 fcn = f_cut;
93 fflag = 1;
94 break;
95 case 's':
96 sflag = 1;
97 break;
98 case 'n':
99 break;
100 case '?':
101 default:
102 usage();
103 }
104 argc -= optind;
105 argv += optind;
106
107 if (fflag) {
108 if (cflag)
109 usage();
110 } else if (!cflag || dflag || sflag)
111 usage();
112
113 if (*argv)
114 for (; *argv; ++argv) {
115 if (!(fp = fopen(*argv, "r")))
116 err(1, "%s", *argv);
117 fcn(fp, *argv);
118 (void)fclose(fp);
119 }
120 else
121 fcn(stdin, "stdin");
122 exit(0);
123 }
124
125 int autostart, autostop, maxval;
126
127 char positions[_POSIX2_LINE_MAX + 1];
128
129 void
get_list(char * list)130 get_list(char *list)
131 {
132 int setautostart, start, stop;
133 char *pos;
134 char *p;
135
136 /*
137 * set a byte in the positions array to indicate if a field or
138 * column is to be selected; use +1, it's 1-based, not 0-based.
139 * This parser is less restrictive than the Draft 9 POSIX spec.
140 * POSIX doesn't allow lists that aren't in increasing order or
141 * overlapping lists. We also handle "-3-5" although there's no
142 * real reason too.
143 */
144 while ((p = strsep(&list, ", \t"))) {
145 setautostart = start = stop = 0;
146 if (*p == '-') {
147 ++p;
148 setautostart = 1;
149 }
150 if (isdigit(*p)) {
151 start = stop = strtol(p, &p, 10);
152 if (setautostart && start > autostart)
153 autostart = start;
154 }
155 if (*p == '-') {
156 if (isdigit(p[1]))
157 stop = strtol(p + 1, &p, 10);
158 if (*p == '-') {
159 ++p;
160 if (!autostop || autostop > stop)
161 autostop = stop;
162 }
163 }
164 if (*p)
165 errx(1, "[-cf] list: illegal list value");
166 if (!stop || !start)
167 errx(1, "[-cf] list: values may not include zero");
168 if (stop > _POSIX2_LINE_MAX)
169 errx(1, "[-cf] list: %d too large (max %d)",
170 stop, _POSIX2_LINE_MAX);
171 if (maxval < stop)
172 maxval = stop;
173 for (pos = positions + start; start++ <= stop; *pos++ = 1)
174 ;
175 }
176
177 /* overlapping ranges */
178 if (autostop && maxval > autostop)
179 maxval = autostop;
180
181 /* set autostart */
182 if (autostart)
183 memset(positions + 1, '1', autostart);
184 }
185
186 /* ARGSUSED */
187 void
c_cut(FILE * fp,char * fname)188 c_cut(FILE *fp, char *fname)
189 {
190 int ch, col;
191 char *pos;
192
193 for (;;) {
194 pos = positions + 1;
195 for (col = maxval; col; --col) {
196 if ((ch = getc(fp)) == EOF)
197 return;
198 if (ch == '\n')
199 break;
200 if (*pos++)
201 (void)putchar(ch);
202 }
203 if (ch != '\n') {
204 if (autostop)
205 while ((ch = getc(fp)) != EOF && ch != '\n')
206 (void)putchar(ch);
207 else
208 while ((ch = getc(fp)) != EOF && ch != '\n')
209 ;
210 }
211 (void)putchar('\n');
212 }
213 }
214
215 void
f_cut(FILE * fp,char * fname)216 f_cut(FILE *fp, char *fname)
217 {
218 int ch, field, isdelim;
219 char *pos, *p, sep;
220 int output;
221 size_t len;
222 char *lbuf, *tbuf;
223
224 for (sep = dchar, tbuf = NULL; (lbuf = fgetln(fp, &len));) {
225 output = 0;
226 if (lbuf[len - 1] != '\n') {
227 /* no newline at the end of the last line so add one */
228 if ((tbuf = (char *)malloc(len + 1)) == NULL)
229 err(1, NULL);
230 memcpy(tbuf, lbuf, len);
231 tbuf[len] = '\n';
232 lbuf = tbuf;
233 }
234 for (isdelim = 0, p = lbuf;; ++p) {
235 ch = *p;
236 /* this should work if newline is delimiter */
237 if (ch == sep)
238 isdelim = 1;
239 if (ch == '\n') {
240 if (!isdelim && !sflag)
241 (void)fwrite(lbuf, len, 1, stdout);
242 break;
243 }
244 }
245 if (!isdelim)
246 continue;
247
248 pos = positions + 1;
249 for (field = maxval, p = lbuf; field; --field, ++pos) {
250 if (*pos) {
251 if (output++)
252 (void)putchar(sep);
253 while ((ch = *p++) != '\n' && ch != sep)
254 (void)putchar(ch);
255 } else
256 while ((ch = *p++) != '\n' && ch != sep)
257 ;
258 if (ch == '\n')
259 break;
260 }
261 if (ch != '\n') {
262 if (autostop) {
263 if (output)
264 (void)putchar(sep);
265 for (; (ch = *p) != '\n'; ++p)
266 (void)putchar(ch);
267 } else
268 for (; (ch = *p) != '\n'; ++p)
269 ;
270 }
271 (void)putchar('\n');
272 }
273 if (tbuf)
274 free(tbuf);
275 }
276
277 void
usage(void)278 usage(void)
279 {
280 (void)fprintf(stderr,
281 "usage:\tcut -c list [file1 ...]\n"
282 "\tcut -f list [-s] [-d delim] [file ...]\n"
283 "\tcut -b list [-n] [file ...]\n");
284 exit(1);
285 }
286