source: trunk/coreutils/src/csplit.c@ 2746

Last change on this file since 2746 was 2554, checked in by bird, 20 years ago

coretuils-5.94

File size: 35.7 KB
Line 
1/* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-2005 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18/* Written by Stuart Kemp, [email protected].
19 Modified by David MacKenzie, [email protected]. */
20
21#include <config.h>
22
23#include <getopt.h>
24#include <sys/types.h>
25#include <signal.h>
26
27#include "system.h"
28
29#include <regex.h>
30
31#include "error.h"
32#include "fd-reopen.h"
33#include "inttostr.h"
34#include "quote.h"
35#include "safe-read.h"
36#include "stdio--.h"
37#include "xstrtol.h"
38
39/* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
40 present. */
41#ifndef SA_NOCLDSTOP
42# define SA_NOCLDSTOP 0
43# define sigprocmask(How, Set, Oset) /* empty */
44# define sigset_t int
45# if ! HAVE_SIGINTERRUPT
46# define siginterrupt(sig, flag) /* empty */
47# endif
48#endif
49
50/* The official name of this program (e.g., no `g' prefix). */
51#define PROGRAM_NAME "csplit"
52
53#define AUTHORS "Stuart Kemp", "David MacKenzie"
54
55/* Increment size of area for control records. */
56#define ALLOC_SIZE 20
57
58/* The default prefix for output file names. */
59#define DEFAULT_PREFIX "xx"
60
61/* A compiled pattern arg. */
62struct control
63{
64 char *regexpr; /* Non-compiled regular expression. */
65 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
66 intmax_t offset; /* Offset from regexp to split at. */
67 uintmax_t lines_required; /* Number of lines required. */
68 uintmax_t repeat; /* Repeat count. */
69 int argnum; /* ARGV index. */
70 bool repeat_forever; /* True if `*' used as a repeat count. */
71 bool ignore; /* If true, produce no output (for regexp). */
72};
73
74/* Initial size of data area in buffers. */
75#define START_SIZE 8191
76
77/* Increment size for data area. */
78#define INCR_SIZE 2048
79
80/* Number of lines kept in each node in line list. */
81#define CTRL_SIZE 80
82
83#ifdef DEBUG
84/* Some small values to test the algorithms. */
85# define START_SIZE 200
86# define INCR_SIZE 10
87# define CTRL_SIZE 1
88#endif
89
90/* A string with a length count. */
91struct cstring
92{
93 size_t len;
94 char *str;
95};
96
97/* Pointers to the beginnings of lines in the buffer area.
98 These structures are linked together if needed. */
99struct line
100{
101 size_t used; /* Number of offsets used in this struct. */
102 size_t insert_index; /* Next offset to use when inserting line. */
103 size_t retrieve_index; /* Next index to use when retrieving line. */
104 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
105 struct line *next; /* Next in linked list. */
106};
107
108/* The structure to hold the input lines.
109 Contains a pointer to the data area and a list containing
110 pointers to the individual lines. */
111struct buffer_record
112{
113 size_t bytes_alloc; /* Size of the buffer area. */
114 size_t bytes_used; /* Bytes used in the buffer area. */
115 uintmax_t start_line; /* First line number in this buffer. */
116 uintmax_t first_available; /* First line that can be retrieved. */
117 size_t num_lines; /* Number of complete lines in this buffer. */
118 char *buffer; /* Data area. */
119 struct line *line_start; /* Head of list of pointers to lines. */
120 struct line *curr_line; /* The line start record currently in use. */
121 struct buffer_record *next;
122};
123
124static void close_output_file (void);
125static void create_output_file (void);
126static void delete_all_files (bool);
127static void save_line_to_file (const struct cstring *line);
128void usage (int status);
129
130/* The name this program was run with. */
131char *program_name;
132
133/* Start of buffer list. */
134static struct buffer_record *head = NULL;
135
136/* Partially read line. */
137static char *hold_area = NULL;
138
139/* Number of bytes in `hold_area'. */
140static size_t hold_count = 0;
141
142/* Number of the last line in the buffers. */
143static uintmax_t last_line_number = 0;
144
145/* Number of the line currently being examined. */
146static uintmax_t current_line = 0;
147
148/* If true, we have read EOF. */
149static bool have_read_eof = false;
150
151/* Name of output files. */
152static char * volatile filename_space = NULL;
153
154/* Prefix part of output file names. */
155static char * volatile prefix = NULL;
156
157/* Suffix part of output file names. */
158static char * volatile suffix = NULL;
159
160/* Number of digits to use in output file names. */
161static int volatile digits = 2;
162
163/* Number of files created so far. */
164static unsigned int volatile files_created = 0;
165
166/* Number of bytes written to current file. */
167static uintmax_t bytes_written;
168
169/* Output file pointer. */
170static FILE *output_stream = NULL;
171
172/* Output file name. */
173static char *output_filename = NULL;
174
175/* Perhaps it would be cleaner to pass arg values instead of indexes. */
176static char **global_argv;
177
178/* If true, do not print the count of bytes in each output file. */
179static bool suppress_count;
180
181/* If true, remove output files on error. */
182static bool volatile remove_files;
183
184/* If true, remove all output files which have a zero length. */
185static bool elide_empty_files;
186
187/* The compiled pattern arguments, which determine how to split
188 the input file. */
189static struct control *controls;
190
191/* Number of elements in `controls'. */
192static size_t control_used;
193
194/* The set of signals that are caught. */
195static sigset_t caught_signals;
196
197static struct option const longopts[] =
198{
199 {"digits", required_argument, NULL, 'n'},
200 {"quiet", no_argument, NULL, 'q'},
201 {"silent", no_argument, NULL, 's'},
202 {"keep-files", no_argument, NULL, 'k'},
203 {"elide-empty-files", no_argument, NULL, 'z'},
204 {"prefix", required_argument, NULL, 'f'},
205 {"suffix-format", required_argument, NULL, 'b'},
206 {GETOPT_HELP_OPTION_DECL},
207 {GETOPT_VERSION_OPTION_DECL},
208 {NULL, 0, NULL, 0}
209};
210
211/* Optionally remove files created so far; then exit.
212 Called when an error detected. */
213
214static void
215cleanup (void)
216{
217 sigset_t oldset;
218
219 close_output_file ();
220
221 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
222 delete_all_files (false);
223 sigprocmask (SIG_SETMASK, &oldset, NULL);
224}
225
226static void cleanup_fatal (void) ATTRIBUTE_NORETURN;
227static void
228cleanup_fatal (void)
229{
230 cleanup ();
231 exit (EXIT_FAILURE);
232}
233
234extern void
235xalloc_die (void)
236{
237 error (0, 0, "%s", _("memory exhausted"));
238 cleanup_fatal ();
239}
240
241static void
242interrupt_handler (int sig)
243{
244 if (! SA_NOCLDSTOP)
245 signal (sig, SIG_IGN);
246
247 delete_all_files (true);
248
249 signal (sig, SIG_DFL);
250 raise (sig);
251}
252
253/* Keep track of NUM bytes of a partial line in buffer START.
254 These bytes will be retrieved later when another large buffer is read. */
255
256static void
257save_to_hold_area (char *start, size_t num)
258{
259 free (hold_area);
260 hold_area = start;
261 hold_count = num;
262}
263
264/* Read up to MAX_N_BYTES bytes from the input stream into DEST.
265 Return the number of bytes read. */
266
267static size_t
268read_input (char *dest, size_t max_n_bytes)
269{
270 size_t bytes_read;
271
272 if (max_n_bytes == 0)
273 return 0;
274
275 bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
276
277 if (bytes_read == 0)
278 have_read_eof = true;
279
280 if (bytes_read == SAFE_READ_ERROR)
281 {
282 error (0, errno, _("read error"));
283 cleanup_fatal ();
284 }
285
286 return bytes_read;
287}
288
289/* Initialize existing line record P. */
290
291static void
292clear_line_control (struct line *p)
293{
294 p->used = 0;
295 p->insert_index = 0;
296 p->retrieve_index = 0;
297}
298
299/* Return a new, initialized line record. */
300
301static struct line *
302new_line_control (void)
303{
304 struct line *p = xmalloc (sizeof *p);
305
306 p->next = NULL;
307 clear_line_control (p);
308
309 return p;
310}
311
312/* Record LINE_START, which is the address of the start of a line
313 of length LINE_LEN in the large buffer, in the lines buffer of B. */
314
315static void
316keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
317{
318 struct line *l;
319
320 /* If there is no existing area to keep line info, get some. */
321 if (b->line_start == NULL)
322 b->line_start = b->curr_line = new_line_control ();
323
324 /* If existing area for lines is full, get more. */
325 if (b->curr_line->used == CTRL_SIZE)
326 {
327 b->curr_line->next = new_line_control ();
328 b->curr_line = b->curr_line->next;
329 }
330
331 l = b->curr_line;
332
333 /* Record the start of the line, and update counters. */
334 l->starts[l->insert_index].str = line_start;
335 l->starts[l->insert_index].len = line_len;
336 l->used++;
337 l->insert_index++;
338}
339
340/* Scan the buffer in B for newline characters
341 and record the line start locations and lengths in B.
342 Return the number of lines found in this buffer.
343
344 There may be an incomplete line at the end of the buffer;
345 a pointer is kept to this area, which will be used when
346 the next buffer is filled. */
347
348static size_t
349record_line_starts (struct buffer_record *b)
350{
351 char *line_start; /* Start of current line. */
352 char *line_end; /* End of each line found. */
353 size_t bytes_left; /* Length of incomplete last line. */
354 size_t lines; /* Number of lines found. */
355 size_t line_length; /* Length of each line found. */
356
357 if (b->bytes_used == 0)
358 return 0;
359
360 lines = 0;
361 line_start = b->buffer;
362 bytes_left = b->bytes_used;
363
364 for (;;)
365 {
366 line_end = memchr (line_start, '\n', bytes_left);
367 if (line_end == NULL)
368 break;
369 line_length = line_end - line_start + 1;
370 keep_new_line (b, line_start, line_length);
371 bytes_left -= line_length;
372 line_start = line_end + 1;
373 lines++;
374 }
375
376 /* Check for an incomplete last line. */
377 if (bytes_left)
378 {
379 if (have_read_eof)
380 {
381 keep_new_line (b, line_start, bytes_left);
382 lines++;
383 }
384 else
385 save_to_hold_area (xmemdup (line_start, bytes_left), bytes_left);
386 }
387
388 b->num_lines = lines;
389 b->first_available = b->start_line = last_line_number + 1;
390 last_line_number += lines;
391
392 return lines;
393}
394
395/* Return a new buffer with room to store SIZE bytes, plus
396 an extra byte for safety. */
397
398static struct buffer_record *
399create_new_buffer (size_t size)
400{
401 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
402
403 new_buffer->buffer = xmalloc (size + 1);
404
405 new_buffer->bytes_alloc = size;
406 new_buffer->line_start = new_buffer->curr_line = NULL;
407
408 return new_buffer;
409}
410
411/* Return a new buffer of at least MINSIZE bytes. If a buffer of at
412 least that size is currently free, use it, otherwise create a new one. */
413
414static struct buffer_record *
415get_new_buffer (size_t min_size)
416{
417 struct buffer_record *new_buffer; /* Buffer to return. */
418 size_t alloc_size; /* Actual size that will be requested. */
419
420 alloc_size = START_SIZE;
421 if (alloc_size < min_size)
422 {
423 size_t s = min_size - alloc_size + INCR_SIZE - 1;
424 alloc_size += s - s % INCR_SIZE;
425 }
426
427 new_buffer = create_new_buffer (alloc_size);
428
429 new_buffer->num_lines = 0;
430 new_buffer->bytes_used = 0;
431 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
432 new_buffer->next = NULL;
433
434 return new_buffer;
435}
436
437static void
438free_buffer (struct buffer_record *buf)
439{
440 free (buf->buffer);
441 buf->buffer = NULL;
442}
443
444/* Append buffer BUF to the linked list of buffers that contain
445 some data yet to be processed. */
446
447static void
448save_buffer (struct buffer_record *buf)
449{
450 struct buffer_record *p;
451
452 buf->next = NULL;
453 buf->curr_line = buf->line_start;
454
455 if (head == NULL)
456 head = buf;
457 else
458 {
459 for (p = head; p->next; p = p->next)
460 /* Do nothing. */ ;
461 p->next = buf;
462 }
463}
464
465/* Fill a buffer of input.
466
467 Set the initial size of the buffer to a default.
468 Fill the buffer (from the hold area and input stream)
469 and find the individual lines.
470 If no lines are found (the buffer is too small to hold the next line),
471 release the current buffer (whose contents would have been put in the
472 hold area) and repeat the process with another large buffer until at least
473 one entire line has been read.
474
475 Return true if a new buffer was obtained, otherwise false
476 (in which case end-of-file must have been encountered). */
477
478static bool
479load_buffer (void)
480{
481 struct buffer_record *b;
482 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
483 size_t bytes_avail; /* Size of new buffer created. */
484 size_t lines_found; /* Number of lines in this new buffer. */
485 char *p; /* Place to load into buffer. */
486
487 if (have_read_eof)
488 return false;
489
490 /* We must make the buffer at least as large as the amount of data
491 in the partial line left over from the last call. */
492 if (bytes_wanted < hold_count)
493 bytes_wanted = hold_count;
494
495 while (1)
496 {
497 b = get_new_buffer (bytes_wanted);
498 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
499 p = b->buffer;
500
501 /* First check the `holding' area for a partial line. */
502 if (hold_count)
503 {
504 memcpy (p, hold_area, hold_count);
505 p += hold_count;
506 b->bytes_used += hold_count;
507 bytes_avail -= hold_count;
508 hold_count = 0;
509 }
510
511 b->bytes_used += read_input (p, bytes_avail);
512
513 lines_found = record_line_starts (b);
514 if (!lines_found)
515 free_buffer (b);
516
517 if (lines_found || have_read_eof)
518 break;
519
520 if (xalloc_oversized (2, b->bytes_alloc))
521 xalloc_die ();
522 bytes_wanted = 2 * b->bytes_alloc;
523 free_buffer (b);
524 free (b);
525 }
526
527 if (lines_found)
528 save_buffer (b);
529
530 return lines_found != 0;
531}
532
533/* Return the line number of the first line that has not yet been retrieved. */
534
535static uintmax_t
536get_first_line_in_buffer (void)
537{
538 if (head == NULL && !load_buffer ())
539 error (EXIT_FAILURE, errno, _("input disappeared"));
540
541 return head->first_available;
542}
543
544/* Return a pointer to the logical first line in the buffer and make the
545 next line the logical first line.
546 Return NULL if there is no more input. */
547
548static struct cstring *
549remove_line (void)
550{
551 /* If non-NULL, this is the buffer for which the previous call
552 returned the final line. So now, presuming that line has been
553 processed, we can free the buffer and reset this pointer. */
554 static struct buffer_record *prev_buf = NULL;
555
556 struct cstring *line; /* Return value. */
557 struct line *l; /* For convenience. */
558
559 if (prev_buf)
560 {
561 free_buffer (prev_buf);
562 prev_buf = NULL;
563 }
564
565 if (head == NULL && !load_buffer ())
566 return NULL;
567
568 if (current_line < head->first_available)
569 current_line = head->first_available;
570
571 ++(head->first_available);
572
573 l = head->curr_line;
574
575 line = &l->starts[l->retrieve_index];
576
577 /* Advance index to next line. */
578 if (++l->retrieve_index == l->used)
579 {
580 /* Go on to the next line record. */
581 head->curr_line = l->next;
582 if (head->curr_line == NULL || head->curr_line->used == 0)
583 {
584 /* Go on to the next data block.
585 but first record the current one so we can free it
586 once the line we're returning has been processed. */
587 prev_buf = head;
588 head = head->next;
589 }
590 }
591
592 return line;
593}
594
595/* Search the buffers for line LINENUM, reading more input if necessary.
596 Return a pointer to the line, or NULL if it is not found in the file. */
597
598static struct cstring *
599find_line (uintmax_t linenum)
600{
601 struct buffer_record *b;
602
603 if (head == NULL && !load_buffer ())
604 return NULL;
605
606 if (linenum < head->start_line)
607 return NULL;
608
609 for (b = head;;)
610 {
611 if (linenum < b->start_line + b->num_lines)
612 {
613 /* The line is in this buffer. */
614 struct line *l;
615 size_t offset; /* How far into the buffer the line is. */
616
617 l = b->line_start;
618 offset = linenum - b->start_line;
619 /* Find the control record. */
620 while (offset >= CTRL_SIZE)
621 {
622 l = l->next;
623 offset -= CTRL_SIZE;
624 }
625 return &l->starts[offset];
626 }
627 if (b->next == NULL && !load_buffer ())
628 return NULL;
629 b = b->next; /* Try the next data block. */
630 }
631}
632
633/* Return true if at least one more line is available for input. */
634
635static bool
636no_more_lines (void)
637{
638 return find_line (current_line + 1) == NULL;
639}
640
641/* Open NAME as standard input. */
642
643static void
644set_input_file (const char *name)
645{
646 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
647 error (EXIT_FAILURE, errno, _("cannot open %s for reading"), quote (name));
648}
649
650/* Write all lines from the beginning of the buffer up to, but
651 not including, line LAST_LINE, to the current output file.
652 If IGNORE is true, do not output lines selected here.
653 ARGNUM is the index in ARGV of the current pattern. */
654
655static void
656write_to_file (uintmax_t last_line, bool ignore, int argnum)
657{
658 struct cstring *line;
659 uintmax_t first_line; /* First available input line. */
660 uintmax_t lines; /* Number of lines to output. */
661 uintmax_t i;
662
663 first_line = get_first_line_in_buffer ();
664
665 if (first_line > last_line)
666 {
667 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
668 cleanup_fatal ();
669 }
670
671 lines = last_line - first_line;
672
673 for (i = 0; i < lines; i++)
674 {
675 line = remove_line ();
676 if (line == NULL)
677 {
678 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
679 cleanup_fatal ();
680 }
681 if (!ignore)
682 save_line_to_file (line);
683 }
684}
685
686/* Output any lines left after all regexps have been processed. */
687
688static void
689dump_rest_of_file (void)
690{
691 struct cstring *line;
692
693 while ((line = remove_line ()) != NULL)
694 save_line_to_file (line);
695}
696
697/* Handle an attempt to read beyond EOF under the control of record P,
698 on iteration REPETITION if nonzero. */
699
700static void handle_line_error (const struct control *, uintmax_t)
701 ATTRIBUTE_NORETURN;
702static void
703handle_line_error (const struct control *p, uintmax_t repetition)
704{
705 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
706
707 fprintf (stderr, _("%s: %s: line number out of range"),
708 program_name, quote (umaxtostr (p->lines_required, buf)));
709 if (repetition)
710 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
711 else
712 fprintf (stderr, "\n");
713
714 cleanup_fatal ();
715}
716
717/* Determine the line number that marks the end of this file,
718 then get those lines and save them to the output file.
719 P is the control record.
720 REPETITION is the repetition number. */
721
722static void
723process_line_count (const struct control *p, uintmax_t repetition)
724{
725 uintmax_t linenum;
726 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
727 struct cstring *line;
728
729 create_output_file ();
730
731 linenum = get_first_line_in_buffer ();
732
733 while (linenum++ < last_line_to_save)
734 {
735 line = remove_line ();
736 if (line == NULL)
737 handle_line_error (p, repetition);
738 save_line_to_file (line);
739 }
740
741 close_output_file ();
742
743 /* Ensure that the line number specified is not 1 greater than
744 the number of lines in the file. */
745 if (no_more_lines ())
746 handle_line_error (p, repetition);
747}
748
749static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;
750static void
751regexp_error (struct control *p, uintmax_t repetition, bool ignore)
752{
753 fprintf (stderr, _("%s: %s: match not found"),
754 program_name, quote (global_argv[p->argnum]));
755
756 if (repetition)
757 {
758 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
759 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
760 }
761 else
762 fprintf (stderr, "\n");
763
764 if (!ignore)
765 {
766 dump_rest_of_file ();
767 close_output_file ();
768 }
769 cleanup_fatal ();
770}
771
772/* Read the input until a line matches the regexp in P, outputting
773 it unless P->IGNORE is true.
774 REPETITION is this repeat-count; 0 means the first time. */
775
776static void
777process_regexp (struct control *p, uintmax_t repetition)
778{
779 struct cstring *line; /* From input file. */
780 size_t line_len; /* To make "$" in regexps work. */
781 uintmax_t break_line; /* First line number of next file. */
782 bool ignore = p->ignore; /* If true, skip this section. */
783 regoff_t ret;
784
785 if (!ignore)
786 create_output_file ();
787
788 /* If there is no offset for the regular expression, or
789 it is positive, then it is not necessary to buffer the lines. */
790
791 if (p->offset >= 0)
792 {
793 for (;;)
794 {
795 line = find_line (++current_line);
796 if (line == NULL)
797 {
798 if (p->repeat_forever)
799 {
800 if (!ignore)
801 {
802 dump_rest_of_file ();
803 close_output_file ();
804 }
805 exit (EXIT_SUCCESS);
806 }
807 else
808 regexp_error (p, repetition, ignore);
809 }
810 line_len = line->len;
811 if (line->str[line_len - 1] == '\n')
812 line_len--;
813 ret = re_search (&p->re_compiled, line->str, line_len,
814 0, line_len, NULL);
815 if (ret == -2)
816 {
817 error (0, 0, _("error in regular expression search"));
818 cleanup_fatal ();
819 }
820 if (ret == -1)
821 {
822 line = remove_line ();
823 if (!ignore)
824 save_line_to_file (line);
825 }
826 else
827 break;
828 }
829 }
830 else
831 {
832 /* Buffer the lines. */
833 for (;;)
834 {
835 line = find_line (++current_line);
836 if (line == NULL)
837 {
838 if (p->repeat_forever)
839 {
840 if (!ignore)
841 {
842 dump_rest_of_file ();
843 close_output_file ();
844 }
845 exit (EXIT_SUCCESS);
846 }
847 else
848 regexp_error (p, repetition, ignore);
849 }
850 line_len = line->len;
851 if (line->str[line_len - 1] == '\n')
852 line_len--;
853 ret = re_search (&p->re_compiled, line->str, line_len,
854 0, line_len, NULL);
855 if (ret == -2)
856 {
857 error (0, 0, _("error in regular expression search"));
858 cleanup_fatal ();
859 }
860 if (ret != -1)
861 break;
862 }
863 }
864
865 /* Account for any offset from this regexp. */
866 break_line = current_line + p->offset;
867
868 write_to_file (break_line, ignore, p->argnum);
869
870 if (!ignore)
871 close_output_file ();
872
873 if (p->offset > 0)
874 current_line = break_line;
875}
876
877/* Split the input file according to the control records we have built. */
878
879static void
880split_file (void)
881{
882 size_t i;
883
884 for (i = 0; i < control_used; i++)
885 {
886 uintmax_t j;
887 if (controls[i].regexpr)
888 {
889 for (j = 0; (controls[i].repeat_forever
890 || j <= controls[i].repeat); j++)
891 process_regexp (&controls[i], j);
892 }
893 else
894 {
895 for (j = 0; (controls[i].repeat_forever
896 || j <= controls[i].repeat); j++)
897 process_line_count (&controls[i], j);
898 }
899 }
900
901 create_output_file ();
902 dump_rest_of_file ();
903 close_output_file ();
904}
905
906/* Return the name of output file number NUM.
907
908 This function is called from a signal handler, so it should invoke
909 only reentrant functions that are async-signal-safe. POSIX does
910 not guarantee this for the functions called below, but we don't
911 know of any hosts where this implementation isn't safe. */
912
913static char *
914make_filename (unsigned int num)
915{
916 strcpy (filename_space, prefix);
917 if (suffix)
918 sprintf (filename_space + strlen (prefix), suffix, num);
919 else
920 sprintf (filename_space + strlen (prefix), "%0*u", digits, num);
921 return filename_space;
922}
923
924/* Create the next output file. */
925
926static void
927create_output_file (void)
928{
929 sigset_t oldset;
930 bool fopen_ok;
931 int fopen_errno;
932
933 output_filename = make_filename (files_created);
934
935 /* Create the output file in a critical section, to avoid races. */
936 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
937 output_stream = fopen (output_filename, "w");
938 fopen_ok = (output_stream != NULL);
939 fopen_errno = errno;
940 files_created += fopen_ok;
941 sigprocmask (SIG_SETMASK, &oldset, NULL);
942
943 if (! fopen_ok)
944 {
945 error (0, fopen_errno, "%s", output_filename);
946 cleanup_fatal ();
947 }
948 bytes_written = 0;
949}
950
951/* If requested, delete all the files we have created. This function
952 must be called only from critical sections. */
953
954static void
955delete_all_files (bool in_signal_handler)
956{
957 unsigned int i;
958
959 if (! remove_files)
960 return;
961
962 for (i = 0; i < files_created; i++)
963 {
964 const char *name = make_filename (i);
965 if (unlink (name) != 0 && !in_signal_handler)
966 error (0, errno, "%s", name);
967 }
968
969 files_created = 0;
970}
971
972/* Close the current output file and print the count
973 of characters in this file. */
974
975static void
976close_output_file (void)
977{
978 if (output_stream)
979 {
980 if (ferror (output_stream))
981 {
982 error (0, 0, _("write error for %s"), quote (output_filename));
983 output_stream = NULL;
984 cleanup_fatal ();
985 }
986 if (fclose (output_stream) != 0)
987 {
988 error (0, errno, "%s", output_filename);
989 output_stream = NULL;
990 cleanup_fatal ();
991 }
992 if (bytes_written == 0 && elide_empty_files)
993 {
994 sigset_t oldset;
995 bool unlink_ok;
996 int unlink_errno;
997
998 /* Remove the output file in a critical section, to avoid races. */
999 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
1000 unlink_ok = (unlink (output_filename) == 0);
1001 unlink_errno = errno;
1002 files_created -= unlink_ok;
1003 sigprocmask (SIG_SETMASK, &oldset, NULL);
1004
1005 if (! unlink_ok)
1006 error (0, unlink_errno, "%s", output_filename);
1007 }
1008 else
1009 {
1010 if (!suppress_count)
1011 {
1012 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1013 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
1014 }
1015 }
1016 output_stream = NULL;
1017 }
1018}
1019
1020/* Save line LINE to the output file and
1021 increment the character count for the current file. */
1022
1023static void
1024save_line_to_file (const struct cstring *line)
1025{
1026 fwrite (line->str, sizeof (char), line->len, output_stream);
1027 bytes_written += line->len;
1028}
1029
1030/* Return a new, initialized control record. */
1031
1032static struct control *
1033new_control_record (void)
1034{
1035 static size_t control_allocated = 0; /* Total space allocated. */
1036 struct control *p;
1037
1038 if (control_used == control_allocated)
1039 controls = X2NREALLOC (controls, &control_allocated);
1040 p = &controls[control_used++];
1041 p->regexpr = NULL;
1042 p->repeat = 0;
1043 p->repeat_forever = false;
1044 p->lines_required = 0;
1045 p->offset = 0;
1046 return p;
1047}
1048
1049/* Check if there is a numeric offset after a regular expression.
1050 STR is the entire command line argument.
1051 P is the control record for this regular expression.
1052 NUM is the numeric part of STR. */
1053
1054static void
1055check_for_offset (struct control *p, const char *str, const char *num)
1056{
1057 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1058 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), str);
1059}
1060
1061/* Given that the first character of command line arg STR is '{',
1062 make sure that the rest of the string is a valid repeat count
1063 and store its value in P.
1064 ARGNUM is the ARGV index of STR. */
1065
1066static void
1067parse_repeat_count (int argnum, struct control *p, char *str)
1068{
1069 uintmax_t val;
1070 char *end;
1071
1072 end = str + strlen (str) - 1;
1073 if (*end != '}')
1074 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1075 *end = '\0';
1076
1077 if (str+1 == end-1 && *(str+1) == '*')
1078 p->repeat_forever = true;
1079 else
1080 {
1081 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1082 {
1083 error (EXIT_FAILURE, 0,
1084 _("%s}: integer required between `{' and `}'"),
1085 global_argv[argnum]);
1086 }
1087 p->repeat = val;
1088 }
1089
1090 *end = '}';
1091}
1092
1093/* Extract the regular expression from STR and check for a numeric offset.
1094 STR should start with the regexp delimiter character.
1095 Return a new control record for the regular expression.
1096 ARGNUM is the ARGV index of STR.
1097 Unless IGNORE is true, mark these lines for output. */
1098
1099static struct control *
1100extract_regexp (int argnum, bool ignore, char *str)
1101{
1102 size_t len; /* Number of bytes in this regexp. */
1103 char delim = *str;
1104 char *closing_delim;
1105 struct control *p;
1106 const char *err;
1107
1108 closing_delim = strrchr (str + 1, delim);
1109 if (closing_delim == NULL)
1110 error (EXIT_FAILURE, 0,
1111 _("%s: closing delimiter `%c' missing"), str, delim);
1112
1113 len = closing_delim - str - 1;
1114 p = new_control_record ();
1115 p->argnum = argnum;
1116 p->ignore = ignore;
1117
1118 p->regexpr = xmalloc (len + 1);
1119 strncpy (p->regexpr, str + 1, len);
1120 p->re_compiled.allocated = len * 2;
1121 p->re_compiled.buffer = xmalloc (p->re_compiled.allocated);
1122 p->re_compiled.fastmap = xmalloc (1 << CHAR_BIT);
1123 p->re_compiled.translate = NULL;
1124 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1125 if (err)
1126 {
1127 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1128 cleanup_fatal ();
1129 }
1130
1131 if (closing_delim[1])
1132 check_for_offset (p, str, closing_delim + 1);
1133
1134 return p;
1135}
1136
1137/* Extract the break patterns from args START through ARGC - 1 of ARGV.
1138 After each pattern, check if the next argument is a repeat count. */
1139
1140static void
1141parse_patterns (int argc, int start, char **argv)
1142{
1143 int i; /* Index into ARGV. */
1144 struct control *p; /* New control record created. */
1145 uintmax_t val;
1146 static uintmax_t last_val = 0;
1147
1148 for (i = start; i < argc; i++)
1149 {
1150 if (*argv[i] == '/' || *argv[i] == '%')
1151 {
1152 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1153 }
1154 else
1155 {
1156 p = new_control_record ();
1157 p->argnum = i;
1158
1159 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1160 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1161 if (val == 0)
1162 error (EXIT_FAILURE, 0,
1163 _("%s: line number must be greater than zero"),
1164 argv[i]);
1165 if (val < last_val)
1166 {
1167 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1168 error (EXIT_FAILURE, 0,
1169 _("line number %s is smaller than preceding line number, %s"),
1170 quote (argv[i]), umaxtostr (last_val, buf));
1171 }
1172
1173 if (val == last_val)
1174 error (0, 0,
1175 _("warning: line number %s is the same as preceding line number"),
1176 quote (argv[i]));
1177
1178 last_val = val;
1179
1180 p->lines_required = val;
1181 }
1182
1183 if (i + 1 < argc && *argv[i + 1] == '{')
1184 {
1185 /* We have a repeat count. */
1186 i++;
1187 parse_repeat_count (i, p, argv[i]);
1188 }
1189 }
1190}
1191
1192static unsigned int
1193get_format_flags (char **format_ptr)
1194{
1195 unsigned int count = 0;
1196
1197 for (; **format_ptr; (*format_ptr)++)
1198 {
1199 switch (**format_ptr)
1200 {
1201 case '-':
1202 break;
1203
1204 case '+':
1205 case ' ':
1206 count |= 1;
1207 break;
1208
1209 case '#':
1210 count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */
1211 break;
1212
1213 default:
1214 return count;
1215 }
1216 }
1217 return count;
1218}
1219
1220static size_t
1221get_format_width (char **format_ptr)
1222{
1223 unsigned long int val = 0;
1224
1225 if (ISDIGIT (**format_ptr)
1226 && (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1227 || SIZE_MAX < val))
1228 error (EXIT_FAILURE, 0, _("invalid format width"));
1229
1230 /* Allow for enough octal digits to represent the value of UINT_MAX,
1231 even if the field width is less than that. */
1232 return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
1233}
1234
1235static size_t
1236get_format_prec (char **format_ptr)
1237{
1238 if (**format_ptr != '.')
1239 return 0;
1240 (*format_ptr)++;
1241
1242 if (! ISDIGIT (**format_ptr))
1243 return 0;
1244 else
1245 {
1246 unsigned long int val;
1247 if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1248 || SIZE_MAX < val)
1249 error (EXIT_FAILURE, 0, _("invalid format precision"));
1250 return val;
1251 }
1252}
1253
1254static void
1255get_format_conv_type (char **format_ptr)
1256{
1257 unsigned char ch = *(*format_ptr)++;
1258
1259 switch (ch)
1260 {
1261 case 'd':
1262 case 'i':
1263 case 'o':
1264 case 'u':
1265 case 'x':
1266 case 'X':
1267 break;
1268
1269 case 0:
1270 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1271 break;
1272
1273 default:
1274 if (ISPRINT (ch))
1275 error (EXIT_FAILURE, 0,
1276 _("invalid conversion specifier in suffix: %c"), ch);
1277 else
1278 error (EXIT_FAILURE, 0,
1279 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1280 }
1281}
1282
1283static size_t
1284max_out (char *format)
1285{
1286 size_t out_count = 0;
1287 bool percent = false;
1288
1289 while (*format)
1290 {
1291 if (*format++ != '%')
1292 out_count++;
1293 else if (*format == '%')
1294 {
1295 format++;
1296 out_count++;
1297 }
1298 else
1299 {
1300 if (percent)
1301 error (EXIT_FAILURE, 0,
1302 _("too many %% conversion specifications in suffix"));
1303 percent = true;
1304 out_count += get_format_flags (&format);
1305 {
1306 size_t width = get_format_width (&format);
1307 size_t prec = get_format_prec (&format);
1308
1309 out_count += MAX (width, prec);
1310 }
1311 get_format_conv_type (&format);
1312 }
1313 }
1314
1315 if (! percent)
1316 error (EXIT_FAILURE, 0,
1317 _("missing %% conversion specification in suffix"));
1318
1319 return out_count;
1320}
1321
1322int
1323main (int argc, char **argv)
1324{
1325 int optc;
1326 unsigned long int val;
1327
1328 initialize_main (&argc, &argv);
1329 program_name = argv[0];
1330 setlocale (LC_ALL, "");
1331 bindtextdomain (PACKAGE, LOCALEDIR);
1332 textdomain (PACKAGE);
1333
1334 atexit (close_stdout);
1335
1336 global_argv = argv;
1337 controls = NULL;
1338 control_used = 0;
1339 suppress_count = false;
1340 remove_files = true;
1341 prefix = DEFAULT_PREFIX;
1342
1343 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1344 switch (optc)
1345 {
1346 case 'f':
1347 prefix = optarg;
1348 break;
1349
1350 case 'b':
1351 suffix = optarg;
1352 break;
1353
1354 case 'k':
1355 remove_files = false;
1356 break;
1357
1358 case 'n':
1359 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1360 || val > INT_MAX)
1361 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1362 digits = val;
1363 break;
1364
1365 case 's':
1366 case 'q':
1367 suppress_count = true;
1368 break;
1369
1370 case 'z':
1371 elide_empty_files = true;
1372 break;
1373
1374 case_GETOPT_HELP_CHAR;
1375
1376 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1377
1378 default:
1379 usage (EXIT_FAILURE);
1380 }
1381
1382 if (argc - optind < 2)
1383 {
1384 if (argc <= optind)
1385 error (0, 0, _("missing operand"));
1386 else
1387 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1388 usage (EXIT_FAILURE);
1389 }
1390
1391 if (suffix)
1392 filename_space = xmalloc (strlen (prefix) + max_out (suffix) + 2);
1393 else
1394 filename_space = xmalloc (strlen (prefix) + digits + 2);
1395
1396 set_input_file (argv[optind++]);
1397
1398 parse_patterns (argc, optind, argv);
1399
1400 {
1401 int i;
1402 static int const sig[] = { SIGHUP, SIGINT, SIGQUIT, SIGTERM };
1403 enum { nsigs = sizeof sig / sizeof sig[0] };
1404
1405#if SA_NOCLDSTOP
1406 struct sigaction act;
1407
1408 sigemptyset (&caught_signals);
1409 for (i = 0; i < nsigs; i++)
1410 {
1411 sigaction (sig[i], NULL, &act);
1412 if (act.sa_handler != SIG_IGN)
1413 sigaddset (&caught_signals, sig[i]);
1414 }
1415
1416 act.sa_handler = interrupt_handler;
1417 act.sa_mask = caught_signals;
1418 act.sa_flags = 0;
1419
1420 for (i = 0; i < nsigs; i++)
1421 if (sigismember (&caught_signals, sig[i]))
1422 sigaction (sig[i], &act, NULL);
1423#else
1424 for (i = 0; i < nsigs; i++)
1425 if (signal (sig[i], SIG_IGN) != SIG_IGN)
1426 {
1427 signal (sig[i], interrupt_handler);
1428 siginterrupt (sig[i], 1);
1429 }
1430#endif
1431 }
1432
1433 split_file ();
1434
1435 if (close (STDIN_FILENO) != 0)
1436 {
1437 error (0, errno, _("read error"));
1438 cleanup_fatal ();
1439 }
1440
1441 exit (EXIT_SUCCESS);
1442}
1443
1444void
1445usage (int status)
1446{
1447 if (status != EXIT_SUCCESS)
1448 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1449 program_name);
1450 else
1451 {
1452 printf (_("\
1453Usage: %s [OPTION]... FILE PATTERN...\n\
1454"),
1455 program_name);
1456 fputs (_("\
1457Output pieces of FILE separated by PATTERN(s) to files `xx00', `xx01', ...,\n\
1458and output byte counts of each piece to standard output.\n\
1459\n\
1460"), stdout);
1461 fputs (_("\
1462Mandatory arguments to long options are mandatory for short options too.\n\
1463"), stdout);
1464 fputs (_("\
1465 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1466 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1467 -k, --keep-files do not remove output files on errors\n\
1468"), stdout);
1469 fputs (_("\
1470 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1471 -s, --quiet, --silent do not print counts of output file sizes\n\
1472 -z, --elide-empty-files remove empty output files\n\
1473"), stdout);
1474 fputs (HELP_OPTION_DESCRIPTION, stdout);
1475 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1476 fputs (_("\
1477\n\
1478Read standard input if FILE is -. Each PATTERN may be:\n\
1479"), stdout);
1480 fputs (_("\
1481\n\
1482 INTEGER copy up to but not including specified line number\n\
1483 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1484 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1485 {INTEGER} repeat the previous pattern specified number of times\n\
1486 {*} repeat the previous pattern as many times as possible\n\
1487\n\
1488A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1489"), stdout);
1490 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
1491 }
1492 exit (status);
1493}
Note: See TracBrowser for help on using the repository browser.