source: trunk/coreutils/src/wc.c@ 2603

Last change on this file since 2603 was 2554, checked in by bird, 20 years ago

coretuils-5.94

File size: 16.1 KB
Line 
1/* wc - print the number of lines, words, and bytes in files
2 Copyright (C) 85, 91, 1995-2005 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18/* Written by Paul Rubin, [email protected]
19 and David MacKenzie, [email protected]. */
20
21
22#include <config.h>
23
24#include <stdio.h>
25#include <getopt.h>
26#include <sys/types.h>
27
28/* Get mbstate_t, mbrtowc(), wcwidth(). */
29#if HAVE_WCHAR_H
30# include <wchar.h>
31#endif
32
33/* Get iswprint(), iswspace(). */
34#if HAVE_WCTYPE_H
35# include <wctype.h>
36#endif
37#if !defined iswprint && !HAVE_ISWPRINT
38# define iswprint(wc) 1
39#endif
40#if !defined iswspace && !HAVE_ISWSPACE
41# define iswspace(wc) \
42 ((wc) == to_uchar (wc) && ISSPACE (to_uchar (wc)))
43#endif
44
45/* Include this after wctype.h so that we `#undef' ISPRINT
46 (from Solaris's euc.h, from widec.h, from wctype.h) before
47 redefining and using it. */
48#include "system.h"
49
50#include "error.h"
51#include "inttostr.h"
52#include "safe-read.h"
53
54#ifndef HAVE_DECL_WCWIDTH
55"this configure-time declaration test was not run"
56#endif
57#if !HAVE_DECL_WCWIDTH
58extern int wcwidth ();
59#endif
60
61/* If wcwidth() doesn't exist, assume all printable characters have
62 width 1. */
63#if !defined wcwidth && !HAVE_WCWIDTH
64# define wcwidth(wc) ((wc) == 0 ? 0 : iswprint (wc) ? 1 : -1)
65#endif
66
67/* The official name of this program (e.g., no `g' prefix). */
68#define PROGRAM_NAME "wc"
69
70#define AUTHORS "Paul Rubin", "David MacKenzie"
71
72/* Size of atomic reads. */
73#define BUFFER_SIZE (16 * 1024)
74
75/* The name this program was run with. */
76char *program_name;
77
78/* Cumulative number of lines, words, chars and bytes in all files so far.
79 max_line_length is the maximum over all files processed so far. */
80static uintmax_t total_lines;
81static uintmax_t total_words;
82static uintmax_t total_chars;
83static uintmax_t total_bytes;
84static uintmax_t max_line_length;
85
86/* Which counts to print. */
87static bool print_lines, print_words, print_chars, print_bytes;
88static bool print_linelength;
89
90/* The print width of each count. */
91static int number_width;
92
93/* True if we have ever read the standard input. */
94static bool have_read_stdin;
95
96/* The result of calling fstat or stat on a file descriptor or file. */
97struct fstatus
98{
99 /* If positive, fstat or stat has not been called yet. Otherwise,
100 this is the value returned from fstat or stat. */
101 int failed;
102
103 /* If FAILED is zero, this is the file's status. */
104 struct stat st;
105};
106
107
108static struct option const longopts[] =
109{
110 {"bytes", no_argument, NULL, 'c'},
111 {"chars", no_argument, NULL, 'm'},
112 {"lines", no_argument, NULL, 'l'},
113 {"words", no_argument, NULL, 'w'},
114 {"max-line-length", no_argument, NULL, 'L'},
115 {GETOPT_HELP_OPTION_DECL},
116 {GETOPT_VERSION_OPTION_DECL},
117 {NULL, 0, NULL, 0}
118};
119
120void
121usage (int status)
122{
123 if (status != EXIT_SUCCESS)
124 fprintf (stderr, _("Try `%s --help' for more information.\n"),
125 program_name);
126 else
127 {
128 printf (_("\
129Usage: %s [OPTION]... [FILE]...\n\
130"),
131 program_name);
132 fputs (_("\
133Print newline, word, and byte counts for each FILE, and a total line if\n\
134more than one FILE is specified. With no FILE, or when FILE is -,\n\
135read standard input.\n\
136 -c, --bytes print the byte counts\n\
137 -m, --chars print the character counts\n\
138 -l, --lines print the newline counts\n\
139"), stdout);
140 fputs (_("\
141 -L, --max-line-length print the length of the longest line\n\
142 -w, --words print the word counts\n\
143"), stdout);
144 fputs (HELP_OPTION_DESCRIPTION, stdout);
145 fputs (VERSION_OPTION_DESCRIPTION, stdout);
146 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
147 }
148 exit (status);
149}
150
151/* FILE is the name of the file (or NULL for standard input)
152 associated with the specified counters. */
153static void
154write_counts (uintmax_t lines,
155 uintmax_t words,
156 uintmax_t chars,
157 uintmax_t bytes,
158 uintmax_t linelength,
159 const char *file)
160{
161 static char const format_sp_int[] = " %*s";
162 char const *format_int = format_sp_int + 1;
163 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
164
165 if (print_lines)
166 {
167 printf (format_int, number_width, umaxtostr (lines, buf));
168 format_int = format_sp_int;
169 }
170 if (print_words)
171 {
172 printf (format_int, number_width, umaxtostr (words, buf));
173 format_int = format_sp_int;
174 }
175 if (print_chars)
176 {
177 printf (format_int, number_width, umaxtostr (chars, buf));
178 format_int = format_sp_int;
179 }
180 if (print_bytes)
181 {
182 printf (format_int, number_width, umaxtostr (bytes, buf));
183 format_int = format_sp_int;
184 }
185 if (print_linelength)
186 {
187 printf (format_int, number_width, umaxtostr (linelength, buf));
188 }
189 if (file)
190 printf (" %s", file);
191 putchar ('\n');
192}
193
194/* Count words. FILE_X is the name of the file (or NULL for standard
195 input) that is open on descriptor FD. *FSTATUS is its status.
196 Return true if successful. */
197static bool
198wc (int fd, char const *file_x, struct fstatus *fstatus)
199{
200 bool ok = true;
201 char buf[BUFFER_SIZE + 1];
202 size_t bytes_read;
203 uintmax_t lines, words, chars, bytes, linelength;
204 bool count_bytes, count_chars, count_complicated;
205 char const *file = file_x ? file_x : _("standard input");
206
207 lines = words = chars = bytes = linelength = 0;
208
209 /* If in the current locale, chars are equivalent to bytes, we prefer
210 counting bytes, because that's easier. */
211#if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
212 if (MB_CUR_MAX > 1)
213 {
214 count_bytes = print_bytes;
215 count_chars = print_chars;
216 }
217 else
218#endif
219 {
220 count_bytes = print_bytes | print_chars;
221 count_chars = false;
222 }
223 count_complicated = print_words | print_linelength;
224
225 /* When counting only bytes, save some line- and word-counting
226 overhead. If FD is a `regular' Unix file, using lseek is enough
227 to get its `size' in bytes. Otherwise, read blocks of BUFFER_SIZE
228 bytes at a time until EOF. Note that the `size' (number of bytes)
229 that wc reports is smaller than stats.st_size when the file is not
230 positioned at its beginning. That's why the lseek calls below are
231 necessary. For example the command
232 `(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
233 should make wc report `0' bytes. */
234
235 if (count_bytes & !count_chars & !print_lines & !count_complicated)
236 {
237 off_t current_pos, end_pos;
238
239 if (0 < fstatus->failed)
240 fstatus->failed = fstat (fd, &fstatus->st);
241
242 if (! fstatus->failed && S_ISREG (fstatus->st.st_mode)
243 && (current_pos = lseek (fd, (off_t) 0, SEEK_CUR)) != -1
244 && (end_pos = lseek (fd, (off_t) 0, SEEK_END)) != -1)
245 {
246 /* Be careful here. The current position may actually be
247 beyond the end of the file. As in the example above. */
248 bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
249 }
250 else
251 {
252 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
253 {
254 if (bytes_read == SAFE_READ_ERROR)
255 {
256 error (0, errno, "%s", file);
257 ok = false;
258 break;
259 }
260 bytes += bytes_read;
261 }
262 }
263 }
264 else if (!count_chars & !count_complicated)
265 {
266 /* Use a separate loop when counting only lines or lines and bytes --
267 but not chars or words. */
268 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
269 {
270 char *p = buf;
271
272 if (bytes_read == SAFE_READ_ERROR)
273 {
274 error (0, errno, "%s", file);
275 ok = false;
276 break;
277 }
278
279 while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
280 {
281 ++p;
282 ++lines;
283 }
284 bytes += bytes_read;
285 }
286 }
287#if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
288# define SUPPORT_OLD_MBRTOWC 1
289 else if (MB_CUR_MAX > 1)
290 {
291 bool in_word = false;
292 uintmax_t linepos = 0;
293 mbstate_t state;
294 uintmax_t last_error_line = 0;
295 int last_error_errno = 0;
296# if SUPPORT_OLD_MBRTOWC
297 /* Back-up the state before each multibyte character conversion and
298 move the last incomplete character of the buffer to the front
299 of the buffer. This is needed because we don't know whether
300 the `mbrtowc' function updates the state when it returns -2, -
301 this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
302 ANSI C, glibc-2.1 and Solaris 5.7 behaviour. We don't have an
303 autoconf test for this, yet. */
304 size_t prev = 0; /* number of bytes carried over from previous round */
305# else
306 const size_t prev = 0;
307# endif
308
309 memset (&state, 0, sizeof (mbstate_t));
310 while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
311 {
312 const char *p;
313# if SUPPORT_OLD_MBRTOWC
314 mbstate_t backup_state;
315# endif
316 if (bytes_read == SAFE_READ_ERROR)
317 {
318 error (0, errno, "%s", file);
319 ok = false;
320 break;
321 }
322
323 bytes += bytes_read;
324 p = buf;
325 bytes_read += prev;
326 do
327 {
328 wchar_t wide_char;
329 size_t n;
330
331# if SUPPORT_OLD_MBRTOWC
332 backup_state = state;
333# endif
334 n = mbrtowc (&wide_char, p, bytes_read, &state);
335 if (n == (size_t) -2)
336 {
337# if SUPPORT_OLD_MBRTOWC
338 state = backup_state;
339# endif
340 break;
341 }
342 if (n == (size_t) -1)
343 {
344 /* Signal repeated errors only once per line. */
345 if (!(lines + 1 == last_error_line
346 && errno == last_error_errno))
347 {
348 char line_number_buf[INT_BUFSIZE_BOUND (uintmax_t)];
349 last_error_line = lines + 1;
350 last_error_errno = errno;
351 error (0, errno, "%s:%s", file,
352 umaxtostr (last_error_line, line_number_buf));
353 ok = false;
354 }
355 p++;
356 bytes_read--;
357 }
358 else
359 {
360 if (n == 0)
361 {
362 wide_char = 0;
363 n = 1;
364 }
365 p += n;
366 bytes_read -= n;
367 chars++;
368 switch (wide_char)
369 {
370 case '\n':
371 lines++;
372 /* Fall through. */
373 case '\r':
374 case '\f':
375 if (linepos > linelength)
376 linelength = linepos;
377 linepos = 0;
378 goto mb_word_separator;
379 case '\t':
380 linepos += 8 - (linepos % 8);
381 goto mb_word_separator;
382 case ' ':
383 linepos++;
384 /* Fall through. */
385 case '\v':
386 mb_word_separator:
387 words += in_word;
388 in_word = false;
389 break;
390 default:
391 if (iswprint (wide_char))
392 {
393 int width = wcwidth (wide_char);
394 if (width > 0)
395 linepos += width;
396 if (iswspace (wide_char))
397 goto mb_word_separator;
398 in_word = true;
399 }
400 break;
401 }
402 }
403 }
404 while (bytes_read > 0);
405
406# if SUPPORT_OLD_MBRTOWC
407 if (bytes_read > 0)
408 {
409 if (bytes_read == BUFFER_SIZE)
410 {
411 /* Encountered a very long redundant shift sequence. */
412 p++;
413 bytes_read--;
414 }
415 memmove (buf, p, bytes_read);
416 }
417 prev = bytes_read;
418# endif
419 }
420 if (linepos > linelength)
421 linelength = linepos;
422 words += in_word;
423 }
424#endif
425 else
426 {
427 bool in_word = false;
428 uintmax_t linepos = 0;
429
430 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
431 {
432 const char *p = buf;
433 if (bytes_read == SAFE_READ_ERROR)
434 {
435 error (0, errno, "%s", file);
436 ok = false;
437 break;
438 }
439
440 bytes += bytes_read;
441 do
442 {
443 switch (*p++)
444 {
445 case '\n':
446 lines++;
447 /* Fall through. */
448 case '\r':
449 case '\f':
450 if (linepos > linelength)
451 linelength = linepos;
452 linepos = 0;
453 goto word_separator;
454 case '\t':
455 linepos += 8 - (linepos % 8);
456 goto word_separator;
457 case ' ':
458 linepos++;
459 /* Fall through. */
460 case '\v':
461 word_separator:
462 words += in_word;
463 in_word = false;
464 break;
465 default:
466 if (ISPRINT (to_uchar (p[-1])))
467 {
468 linepos++;
469 if (ISSPACE (to_uchar (p[-1])))
470 goto word_separator;
471 in_word = true;
472 }
473 break;
474 }
475 }
476 while (--bytes_read);
477 }
478 if (linepos > linelength)
479 linelength = linepos;
480 words += in_word;
481 }
482
483 if (count_chars < print_chars)
484 chars = bytes;
485
486 write_counts (lines, words, chars, bytes, linelength, file_x);
487 total_lines += lines;
488 total_words += words;
489 total_chars += chars;
490 total_bytes += bytes;
491 if (linelength > max_line_length)
492 max_line_length = linelength;
493
494 return ok;
495}
496
497static bool
498wc_file (char const *file, struct fstatus *fstatus)
499{
500 if (! file || STREQ (file, "-"))
501 {
502 have_read_stdin = true;
503 if (O_BINARY && ! isatty (STDIN_FILENO))
504 freopen (NULL, "rb", stdin);
505 return wc (STDIN_FILENO, file, fstatus);
506 }
507 else
508 {
509 int fd = open (file, O_RDONLY | O_BINARY);
510 if (fd == -1)
511 {
512 error (0, errno, "%s", file);
513 return false;
514 }
515 else
516 {
517 bool ok = wc (fd, file, fstatus);
518 if (close (fd) != 0)
519 {
520 error (0, errno, "%s", file);
521 return false;
522 }
523 return ok;
524 }
525 }
526}
527
528/* Return the file status for the NFILES files addressed by FILE.
529 Optimize the case where only one number is printed, for just one
530 file; in that case we can use a print width of 1, so we don't need
531 to stat the file. */
532
533static struct fstatus *
534get_input_fstatus (int nfiles, char * const *file)
535{
536 struct fstatus *fstatus = xnmalloc (nfiles, sizeof *fstatus);
537
538 if (nfiles == 1
539 && ((print_lines + print_words + print_chars
540 + print_bytes + print_linelength)
541 == 1))
542 fstatus[0].failed = 1;
543 else
544 {
545 int i;
546
547 for (i = 0; i < nfiles; i++)
548 fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
549 ? fstat (STDIN_FILENO, &fstatus[i].st)
550 : stat (file[i], &fstatus[i].st));
551 }
552
553 return fstatus;
554}
555
556/* Return a print width suitable for the NFILES files whose status is
557 recorded in FSTATUS. Optimize the same special case that
558 get_input_fstatus optimizes. */
559
560static int
561compute_number_width (int nfiles, struct fstatus const *fstatus)
562{
563 int width = 1;
564
565 if (fstatus[0].failed <= 0)
566 {
567 int minimum_width = 1;
568 uintmax_t regular_total = 0;
569 int i;
570
571 for (i = 0; i < nfiles; i++)
572 if (! fstatus[i].failed)
573 {
574 if (S_ISREG (fstatus[i].st.st_mode))
575 regular_total += fstatus[i].st.st_size;
576 else
577 minimum_width = 7;
578 }
579
580 for (; 10 <= regular_total; regular_total /= 10)
581 width++;
582 if (width < minimum_width)
583 width = minimum_width;
584 }
585
586 return width;
587}
588
589
590int
591main (int argc, char **argv)
592{
593 int i;
594 bool ok;
595 int optc;
596 int nfiles;
597 struct fstatus *fstatus;
598
599 initialize_main (&argc, &argv);
600 program_name = argv[0];
601 setlocale (LC_ALL, "");
602 bindtextdomain (PACKAGE, LOCALEDIR);
603 textdomain (PACKAGE);
604
605 atexit (close_stdout);
606
607 print_lines = print_words = print_chars = print_bytes = false;
608 print_linelength = false;
609 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
610
611 while ((optc = getopt_long (argc, argv, "clLmw", longopts, NULL)) != -1)
612 switch (optc)
613 {
614 case 'c':
615 print_bytes = true;
616 break;
617
618 case 'm':
619 print_chars = true;
620 break;
621
622 case 'l':
623 print_lines = true;
624 break;
625
626 case 'w':
627 print_words = true;
628 break;
629
630 case 'L':
631 print_linelength = true;
632 break;
633
634 case_GETOPT_HELP_CHAR;
635
636 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
637
638 default:
639 usage (EXIT_FAILURE);
640 }
641
642 if (! (print_lines | print_words | print_chars | print_bytes
643 | print_linelength))
644 print_lines = print_words = print_bytes = true;
645
646 nfiles = argc - optind;
647 nfiles += (nfiles == 0);
648
649 fstatus = get_input_fstatus (nfiles, argv + optind);
650 number_width = compute_number_width (nfiles, fstatus);
651
652 ok = true;
653 for (i = 0; i < nfiles; i++)
654 ok &= wc_file (argv[optind + i], &fstatus[i]);
655
656 if (1 < nfiles)
657 write_counts (total_lines, total_words, total_chars, total_bytes,
658 max_line_length, _("total"));
659
660 free (fstatus);
661
662 if (have_read_stdin && close (STDIN_FILENO) != 0)
663 error (EXIT_FAILURE, errno, "-");
664
665 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
666}
Note: See TracBrowser for help on using the repository browser.