source: vendor/perl/5.8.8/lib/Text/ParseWords.pm

Last change on this file was 3181, checked in by bird, 19 years ago

perl 5.8.8

File size: 6.5 KB
Line 
1package Text::ParseWords;
2
3use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE);
4$VERSION = "3.24";
5
6require 5.000;
7
8use Exporter;
9@ISA = qw(Exporter);
10@EXPORT = qw(shellwords quotewords nested_quotewords parse_line);
11@EXPORT_OK = qw(old_shellwords);
12
13
14sub shellwords {
15 my(@lines) = @_;
16 $lines[$#lines] =~ s/\s+$//;
17 return(quotewords('\s+', 0, @lines));
18}
19
20
21
22sub quotewords {
23 my($delim, $keep, @lines) = @_;
24 my($line, @words, @allwords);
25
26 foreach $line (@lines) {
27 @words = parse_line($delim, $keep, $line);
28 return() unless (@words || !length($line));
29 push(@allwords, @words);
30 }
31 return(@allwords);
32}
33
34
35
36sub nested_quotewords {
37 my($delim, $keep, @lines) = @_;
38 my($i, @allwords);
39
40 for ($i = 0; $i < @lines; $i++) {
41 @{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]);
42 return() unless (@{$allwords[$i]} || !length($lines[$i]));
43 }
44 return(@allwords);
45}
46
47
48
49sub parse_line {
50 my($delimiter, $keep, $line) = @_;
51 my($word, @pieces);
52
53 no warnings 'uninitialized'; # we will be testing undef strings
54
55 while (length($line)) {
56 $line =~ s/^(["']) # a $quote
57 ((?:\\.|(?!\1)[^\\])*) # and $quoted text
58 \1 # followed by the same quote
59 | # --OR--
60 ^((?:\\.|[^\\"'])*?) # an $unquoted text
61 (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["']))
62 # plus EOL, delimiter, or quote
63 //xs or return; # extended layout
64 my($quote, $quoted, $unquoted, $delim) = ($1, $2, $3, $4);
65 return() unless( defined($quote) || length($unquoted) || length($delim));
66
67 if ($keep) {
68 $quoted = "$quote$quoted$quote";
69 }
70 else {
71 $unquoted =~ s/\\(.)/$1/sg;
72 if (defined $quote) {
73 $quoted =~ s/\\(.)/$1/sg if ($quote eq '"');
74 $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
75 }
76 }
77 $word .= substr($line, 0, 0); # leave results tainted
78 $word .= defined $quote ? $quoted : $unquoted;
79
80 if (length($delim)) {
81 push(@pieces, $word);
82 push(@pieces, $delim) if ($keep eq 'delimiters');
83 undef $word;
84 }
85 if (!length($line)) {
86 push(@pieces, $word);
87 }
88 }
89 return(@pieces);
90}
91
92
93
94sub old_shellwords {
95
96 # Usage:
97 # use ParseWords;
98 # @words = old_shellwords($line);
99 # or
100 # @words = old_shellwords(@lines);
101 # or
102 # @words = old_shellwords(); # defaults to $_ (and clobbers it)
103
104 no warnings 'uninitialized'; # we will be testing undef strings
105 local *_ = \join('', @_) if @_;
106 my (@words, $snippet);
107
108 s/\A\s+//;
109 while ($_ ne '') {
110 my $field = substr($_, 0, 0); # leave results tainted
111 for (;;) {
112 if (s/\A"(([^"\\]|\\.)*)"//s) {
113 ($snippet = $1) =~ s#\\(.)#$1#sg;
114 }
115 elsif (/\A"/) {
116 require Carp;
117 Carp::carp("Unmatched double quote: $_");
118 return();
119 }
120 elsif (s/\A'(([^'\\]|\\.)*)'//s) {
121 ($snippet = $1) =~ s#\\(.)#$1#sg;
122 }
123 elsif (/\A'/) {
124 require Carp;
125 Carp::carp("Unmatched single quote: $_");
126 return();
127 }
128 elsif (s/\A\\(.)//s) {
129 $snippet = $1;
130 }
131 elsif (s/\A([^\s\\'"]+)//) {
132 $snippet = $1;
133 }
134 else {
135 s/\A\s+//;
136 last;
137 }
138 $field .= $snippet;
139 }
140 push(@words, $field);
141 }
142 return @words;
143}
144
1451;
146
147__END__
148
149=head1 NAME
150
151Text::ParseWords - parse text into an array of tokens or array of arrays
152
153=head1 SYNOPSIS
154
155 use Text::ParseWords;
156 @lists = &nested_quotewords($delim, $keep, @lines);
157 @words = &quotewords($delim, $keep, @lines);
158 @words = &shellwords(@lines);
159 @words = &parse_line($delim, $keep, $line);
160 @words = &old_shellwords(@lines); # DEPRECATED!
161
162=head1 DESCRIPTION
163
164The &nested_quotewords() and &quotewords() functions accept a delimiter
165(which can be a regular expression)
166and a list of lines and then breaks those lines up into a list of
167words ignoring delimiters that appear inside quotes. &quotewords()
168returns all of the tokens in a single long list, while &nested_quotewords()
169returns a list of token lists corresponding to the elements of @lines.
170&parse_line() does tokenizing on a single string. The &*quotewords()
171functions simply call &parse_line(), so if you're only splitting
172one line you can call &parse_line() directly and save a function
173call.