source: trunk/essentials/sys-devel/flex/parse.y@ 3285

Last change on this file since 3285 was 3043, checked in by bird, 19 years ago

-> essentials

File size: 21.1 KB
Line 
1/* parse.y - parser for flex input */
2
3%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER
5%token OPT_TABLES
6
7%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
8%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
9
10/*
11 *POSIX and AT&T lex place the
12 * precedence of the repeat operator, {}, below that of concatenation.
13 * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended
14 * Regular Expression (ERE) precedence that has the repeat operator
15 * higher than concatenation. This causes ab{3} to yield abbb.
16 *
17 * In order to support the POSIX and AT&T precedence and the flex
18 * precedence we define two token sets for the begin and end tokens of
19 * the repeat operator, '{' and '}'. The lexical scanner chooses
20 * which tokens to return based on whether posix_compat or lex_compat
21 * are specified. Specifying either posix_compat or lex_compat will
22 * cause flex to parse scanner files as per the AT&T and
23 * POSIX-mandated behavior.
24 */
25
26%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
27
28
29%{
30/* Copyright (c) 1990 The Regents of the University of California. */
31/* All rights reserved. */
32
33/* This code is derived from software contributed to Berkeley by */
34/* Vern Paxson. */
35
36/* The United States Government has rights in this work pursuant */
37/* to contract no. DE-AC03-76SF00098 between the United States */
38/* Department of Energy and the University of California. */
39
40/* This file is part of flex. */
41
42/* Redistribution and use in source and binary forms, with or without */
43/* modification, are permitted provided that the following conditions */
44/* are met: */
45
46/* 1. Redistributions of source code must retain the above copyright */
47/* notice, this list of conditions and the following disclaimer. */
48/* 2. Redistributions in binary form must reproduce the above copyright */
49/* notice, this list of conditions and the following disclaimer in the */
50/* documentation and/or other materials provided with the distribution. */
51
52/* Neither the name of the University nor the names of its contributors */
53/* may be used to endorse or promote products derived from this software */
54/* without specific prior written permission. */
55
56/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
57/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
58/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
59/* PURPOSE. */
60
61/* Some versions of bison are broken in that they use alloca() but don't
62 * declare it properly. The following is the patented (just kidding!)
63 * #ifdef chud to fix the problem, courtesy of Francois Pinard.
64 */
65#ifdef YYBISON
66/* AIX requires this to be the first thing in the file. What a piece. */
67# ifdef _AIX
68 #pragma alloca
69# endif
70#endif
71
72#include "flexdef.h"
73#include "tables.h"
74
75/* The remainder of the alloca() cruft has to come after including flexdef.h,
76 * so HAVE_ALLOCA_H is (possibly) defined.
77 */
78#ifdef YYBISON
79# ifdef __GNUC__
80# ifndef alloca
81# define alloca __builtin_alloca
82# endif
83# else
84# if HAVE_ALLOCA_H
85# include <alloca.h>
86# else
87# ifdef __hpux
88void *alloca ();
89# else
90# ifdef __TURBOC__
91# include <malloc.h>
92# else
93char *alloca ();
94# endif
95# endif
96# endif
97# endif
98#endif
99
100/* Bletch, ^^^^ that was ugly! */
101
102
103int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, rulelen;
104int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
105
106int *scon_stk;
107int scon_stk_ptr;
108
109static int madeany = false; /* whether we've made the '.' character class */
110int previous_continued_action; /* whether the previous rule's action was '|' */
111
112#define format_warn3(fmt, a1, a2) \
113 do{ \
114 char fw3_msg[MAXLINE];\
115 snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
116 warn( fw3_msg );\
117 }while(0)
118
119/* Expand a POSIX character class expression. */
120#define CCL_EXPR(func) \
121 do{ \
122 int c; \
123 for ( c = 0; c < csize; ++c ) \
124 if ( isascii(c) && func(c) ) \
125 ccladd( currccl, c ); \
126 }while(0)
127
128/* While POSIX defines isblank(), it's not ANSI C. */
129#define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
130
131/* On some over-ambitious machines, such as DEC Alpha's, the default
132 * token type is "long" instead of "int"; this leads to problems with
133 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
134 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
135 * following should ensure that the default token type is "int".
136 */
137#define YYSTYPE int
138
139%}
140
141%%
142goal : initlex sect1 sect1end sect2 initforrule
143 { /* add default rule */
144 int def_rule;
145
146 pat = cclinit();
147 cclnegate( pat );
148
149 def_rule = mkstate( -pat );
150
151 /* Remember the number of the default rule so we
152 * don't generate "can't match" warnings for it.
153 */
154 default_rule = num_rules;
155
156 finish_rule( def_rule, false, 0, 0, 0);
157
158 for ( i = 1; i <= lastsc; ++i )
159 scset[i] = mkbranch( scset[i], def_rule );
160
161 if ( spprdflt )
162 add_action(
163 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
164 else
165 add_action( "ECHO" );
166
167 add_action( ";\n\tYY_BREAK\n" );
168 }
169 ;
170
171initlex :
172 { /* initialize for processing rules */
173
174 /* Create default DFA start condition. */
175 scinstal( "INITIAL", false );
176 }
177 ;
178
179sect1 : sect1 startconddecl namelist1
180 | sect1 options
181 |
182 | error
183 { synerr( _("unknown error processing section 1") ); }
184 ;
185
186sect1end : SECTEND
187 {
188 check_options();
189 scon_stk = allocate_integer_array( lastsc + 1 );
190 scon_stk_ptr = 0;
191 }
192 ;
193
194startconddecl : SCDECL
195 { xcluflg = false; }
196
197 | XSCDECL
198 { xcluflg = true; }
199 ;
200
201namelist1 : namelist1 NAME
202 { scinstal( nmstr, xcluflg ); }
203
204 | NAME
205 { scinstal( nmstr, xcluflg ); }
206
207 | error
208 { synerr( _("bad start condition list") ); }
209 ;
210
211options : OPTION_OP optionlist
212 ;
213
214optionlist : optionlist option
215 |
216 ;
217
218option : OPT_OUTFILE '=' NAME
219 {
220 outfilename = copy_string( nmstr );
221 did_outfilename = 1;
222 }
223 | OPT_PREFIX '=' NAME
224 { prefix = copy_string( nmstr ); }
225 | OPT_YYCLASS '=' NAME
226 { yyclass = copy_string( nmstr ); }
227 | OPT_HEADER '=' NAME
228 { headerfilename = copy_string( nmstr ); }
229 | OPT_TABLES '=' NAME
230 { tablesext = true; tablesfilename = copy_string( nmstr ); }
231 ;
232
233sect2 : sect2 scon initforrule flexrule '\n'
234 { scon_stk_ptr = $2; }
235 | sect2 scon '{' sect2 '}'
236 { scon_stk_ptr = $2; }
237 |
238 ;
239
240initforrule :
241 {
242 /* Initialize for a parse of one rule. */
243 trlcontxt = variable_trail_rule = varlength = false;
244 trailcnt = headcnt = rulelen = 0;
245 current_state_type = STATE_NORMAL;
246 previous_continued_action = continued_action;
247 in_rule = true;
248
249 new_rule();
250 }
251 ;
252
253flexrule : '^' rule
254 {
255 pat = $2;
256 finish_rule( pat, variable_trail_rule,
257 headcnt, trailcnt , previous_continued_action);
258
259 if ( scon_stk_ptr > 0 )
260 {
261 for ( i = 1; i <= scon_stk_ptr; ++i )
262 scbol[scon_stk[i]] =
263 mkbranch( scbol[scon_stk[i]],
264 pat );
265 }
266
267 else
268 {
269 /* Add to all non-exclusive start conditions,
270 * including the default (0) start condition.
271 */
272
273 for ( i = 1; i <= lastsc; ++i )
274 if ( ! scxclu[i] )
275 scbol[i] = mkbranch( scbol[i],
276 pat );
277 }
278
279 if ( ! bol_needed )
280 {
281 bol_needed = true;
282
283 if ( performance_report > 1 )
284 pinpoint_message(
285 "'^' operator results in sub-optimal performance" );
286 }
287 }
288
289 | rule
290 {
291 pat = $1;
292 finish_rule( pat, variable_trail_rule,
293 headcnt, trailcnt , previous_continued_action);
294
295 if ( scon_stk_ptr > 0 )
296 {
297 for ( i = 1; i <= scon_stk_ptr; ++i )
298 scset[scon_stk[i]] =
299 mkbranch( scset[scon_stk[i]],
300 pat );
301 }
302
303 else
304 {
305 for ( i = 1; i <= lastsc; ++i )
306 if ( ! scxclu[i] )
307 scset[i] =
308 mkbranch( scset[i],
309 pat );
310 }
311 }
312
313 | EOF_OP
314 {
315 if ( scon_stk_ptr > 0 )
316 build_eof_action();
317
318 else
319 {
320 /* This EOF applies to all start conditions
321 * which don't already have EOF actions.
322 */
323 for ( i = 1; i <= lastsc; ++i )
324 if ( ! sceof[i] )
325 scon_stk[++scon_stk_ptr] = i;
326
327 if ( scon_stk_ptr == 0 )
328 warn(
329 "all start conditions already have <<EOF>> rules" );
330
331 else
332 build_eof_action();
333 }
334 }
335
336 | error
337 { synerr( _("unrecognized rule") ); }
338 ;
339
340scon_stk_ptr :
341 { $$ = scon_stk_ptr; }
342 ;
343
344scon : '<' scon_stk_ptr namelist2 '>'
345 { $$ = $2; }
346
347 | '<' '*' '>'
348 {
349 $$ = scon_stk_ptr;
350
351 for ( i = 1; i <= lastsc; ++i )
352 {
353 int j;
354
355 for ( j = 1; j <= scon_stk_ptr; ++j )
356 if ( scon_stk[j] == i )
357 break;
358
359 if ( j > scon_stk_ptr )
360 scon_stk[++scon_stk_ptr] = i;
361 }
362 }
363
364 |
365 { $$ = scon_stk_ptr; }
366 ;
367
368namelist2 : namelist2 ',' sconname
369
370 | sconname
371
372 | error
373 { synerr( _("bad start condition list") ); }
374 ;
375
376sconname : NAME
377 {
378 if ( (scnum = sclookup( nmstr )) == 0 )
379 format_pinpoint_message(
380 "undeclared start condition %s",
381 nmstr );
382 else
383 {
384 for ( i = 1; i <= scon_stk_ptr; ++i )
385 if ( scon_stk[i] == scnum )
386 {
387 format_warn(
388 "<%s> specified twice",
389 scname[scnum] );
390 break;
391 }
392
393 if ( i > scon_stk_ptr )
394 scon_stk[++scon_stk_ptr] = scnum;
395 }
396 }
397 ;
398
399rule : re2 re
400 {
401 if ( transchar[lastst[$2]] != SYM_EPSILON )
402 /* Provide final transition \now/ so it
403 * will be marked as a trailing context
404 * state.
405 */
406 $2 = link_machines( $2,
407 mkstate( SYM_EPSILON ) );
408
409 mark_beginning_as_normal( $2 );
410 current_state_type = STATE_NORMAL;
411
412 if ( previous_continued_action )
413 {
414 /* We need to treat this as variable trailing
415 * context so that the backup does not happen
416 * in the action but before the action switch
417 * statement. If the backup happens in the
418 * action, then the rules "falling into" this
419 * one's action will *also* do the backup,
420 * erroneously.
421 */
422 if ( ! varlength || headcnt != 0 )
423 warn(
424 "trailing context made variable due to preceding '|' action" );
425
426 /* Mark as variable. */
427 varlength = true;
428 headcnt = 0;
429
430 }
431
432 if ( lex_compat || (varlength && headcnt == 0) )
433 { /* variable trailing context rule */
434 /* Mark the first part of the rule as the
435 * accepting "head" part of a trailing
436 * context rule.
437 *
438 * By the way, we didn't do this at the
439 * beginning of this production because back
440 * then current_state_type was set up for a
441 * trail rule, and add_accept() can create
442 * a new state ...
443 */
444 add_accept( $1,
445 num_rules | YY_TRAILING_HEAD_MASK );
446 variable_trail_rule = true;
447 }
448
449 else
450 trailcnt = rulelen;
451
452 $$ = link_machines( $1, $2 );
453 }
454
455 | re2 re '$'
456 { synerr( _("trailing context used twice") ); }
457
458 | re '$'
459 {
460 headcnt = 0;
461 trailcnt = 1;
462 rulelen = 1;
463 varlength = false;
464
465 current_state_type = STATE_TRAILING_CONTEXT;
466
467 if ( trlcontxt )
468 {
469 synerr( _("trailing context used twice") );
470 $$ = mkstate( SYM_EPSILON );
471 }
472
473 else if ( previous_continued_action )
474 {
475 /* See the comment in the rule for "re2 re"
476 * above.
477 */
478 warn(
479 "trailing context made variable due to preceding '|' action" );
480
481 varlength = true;
482 }
483
484 if ( lex_compat || varlength )
485 {
486 /* Again, see the comment in the rule for
487 * "re2 re" above.
488 */
489 add_accept( $1,
490 num_rules | YY_TRAILING_HEAD_MASK );
491 variable_trail_rule = true;
492 }
493
494 trlcontxt = true;
495
496 eps = mkstate( SYM_EPSILON );
497 $$ = link_machines( $1,
498 link_machines( eps, mkstate( '\n' ) ) );
499 }
500
501 | re
502 {
503 $$ = $1;
504
505 if ( trlcontxt )
506 {
507 if ( lex_compat || (varlength && headcnt == 0) )
508 /* Both head and trail are
509 * variable-length.
510 */
511 variable_trail_rule = true;
512 else
513 trailcnt = rulelen;
514 }
515 }
516 ;
517
518
519re : re '|' series
520 {
521 varlength = true;
522 $$ = mkor( $1, $3 );
523 }
524
525 | series
526 { $$ = $1; }
527 ;
528
529
530re2 : re '/'
531 {
532 /* This rule is written separately so the
533 * reduction will occur before the trailing
534 * series is parsed.
535 */
536
537 if ( trlcontxt )
538 synerr( _("trailing context used twice") );
539 else
540 trlcontxt = true;
541
542 if ( varlength )
543 /* We hope the trailing context is
544 * fixed-length.
545 */
546 varlength = false;
547 else
548 headcnt = rulelen;
549
550 rulelen = 0;
551
552 current_state_type = STATE_TRAILING_CONTEXT;
553 $$ = $1;
554 }
555 ;
556
557series : series singleton
558 {
559 /* This is where concatenation of adjacent patterns
560 * gets done.
561 */
562 $$ = link_machines( $1, $2 );
563 }
564
565 | singleton
566 { $$ = $1; }
567
568 | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
569 {
570 varlength = true;
571
572 if ( $3 > $5 || $3 < 0 )
573 {
574 synerr( _("bad iteration values") );
575 $$ = $1;
576 }
577 else
578 {
579 if ( $3 == 0 )
580 {
581 if ( $5 <= 0 )
582 {
583 synerr(
584 _("bad iteration values") );
585 $$ = $1;
586 }
587 else
588 $$ = mkopt(
589 mkrep( $1, 1, $5 ) );
590 }
591 else
592 $$ = mkrep( $1, $3, $5 );
593 }
594 }
595
596 | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
597 {
598 varlength = true;
599
600 if ( $3 <= 0 )
601 {
602 synerr( _("iteration value must be positive") );
603 $$ = $1;
604 }
605
606 else
607 $$ = mkrep( $1, $3, INFINITE_REPEAT );
608 }
609
610 | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
611 {
612 /* The series could be something like "(foo)",
613 * in which case we have no idea what its length
614 * is, so we punt here.
615 */
616 varlength = true;
617
618 if ( $3 <= 0 )
619 {
620 synerr( _("iteration value must be positive")
621 );
622 $$ = $1;
623 }
624
625 else
626 $$ = link_machines( $1,
627 copysingl( $1, $3 - 1 ) );
628 }
629
630 ;
631
632singleton : singleton '*'
633 {
634 varlength = true;
635
636 $$ = mkclos( $1 );
637 }
638
639 | singleton '+'
640 {
641 varlength = true;
642 $$ = mkposcl( $1 );
643 }
644
645 | singleton '?'
646 {
647 varlength = true;
648 $$ = mkopt( $1 );
649 }
650
651 | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
652 {
653 varlength = true;
654
655 if ( $3 > $5 || $3 < 0 )
656 {
657 synerr( _("bad iteration values") );
658 $$ = $1;
659 }
660 else
661 {
662 if ( $3 == 0 )
663 {
664 if ( $5 <= 0 )
665 {
666 synerr(
667 _("bad iteration values") );
668 $$ = $1;
669 }
670 else
671 $$ = mkopt(
672 mkrep( $1, 1, $5 ) );
673 }
674 else
675 $$ = mkrep( $1, $3, $5 );
676 }
677 }
678
679 | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
680 {
681 varlength = true;
682
683 if ( $3 <= 0 )
684 {
685 synerr( _("iteration value must be positive") );
686 $$ = $1;
687 }
688
689 else
690 $$ = mkrep( $1, $3, INFINITE_REPEAT );
691 }
692
693 | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
694 {
695 /* The singleton could be something like "(foo)",
696 * in which case we have no idea what its length
697 * is, so we punt here.
698 */
699 varlength = true;
700
701 if ( $3 <= 0 )
702 {
703 synerr( _("iteration value must be positive") );
704 $$ = $1;
705 }
706
707 else
708 $$ = link_machines( $1,
709 copysingl( $1, $3 - 1 ) );
710 }
711
712 | '.'
713 {
714 if ( ! madeany )
715 {
716 /* Create the '.' character class. */
717 anyccl = cclinit();
718 ccladd( anyccl, '\n' );
719 cclnegate( anyccl );
720
721 if ( useecs )
722 mkeccl( ccltbl + cclmap[anyccl],
723 ccllen[anyccl], nextecm,
724 ecgroup, csize, csize );
725
726 madeany = true;
727 }
728
729 ++rulelen;
730
731 $$ = mkstate( -anyccl );
732 }
733
734 | fullccl
735 {
736 if ( ! cclsorted )
737 /* Sort characters for fast searching. We
738 * use a shell sort since this list could
739 * be large.
740 */
741 cshell( ccltbl + cclmap[$1], ccllen[$1], true );
742
743 if ( useecs )
744 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
745 nextecm, ecgroup, csize, csize );
746
747 ++rulelen;
748
749 if (ccl_has_nl[$1])
750 rule_has_nl[num_rules] = true;
751
752 $$ = mkstate( -$1 );
753 }
754
755 | PREVCCL
756 {
757 ++rulelen;
758
759 if (ccl_has_nl[$1])
760 rule_has_nl[num_rules] = true;
761
762 $$ = mkstate( -$1 );
763 }
764
765 | '"' string '"'
766 { $$ = $2; }
767
768 | '(' re ')'
769 { $$ = $2; }
770
771 | CHAR
772 {
773 ++rulelen;
774
775 if ( caseins && $1 >= 'A' && $1 <= 'Z' )
776 $1 = clower( $1 );
777
778 if ($1 == nlch)
779 rule_has_nl[num_rules] = true;
780
781 $$ = mkstate( $1 );
782 }
783 ;
784
785fullccl : '[' ccl ']'
786 { $$ = $2; }