source: trunk/essentials/sys-apps/gawk/dfa.h@ 3078

Last change on this file since 3078 was 3076, checked in by bird, 19 years ago

gawk 3.1.5

File size: 16.7 KB
Line 
1/* dfa.h - declarations for GNU deterministic regexp compiler
2 Copyright (C) 1988, 1998, 2002, 2004 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
17
18/* Written June, 1988 by Mike Haertel */
19
20/* FIXME:
21 2. We should not export so much of the DFA internals.
22 In addition to clobbering modularity, we eat up valuable
23 name space. */
24
25#ifdef __STDC__
26# ifndef _PTR_T
27# define _PTR_T
28 typedef void * ptr_t;
29# endif
30#else
31# ifndef _PTR_T
32# define _PTR_T
33 typedef char * ptr_t;
34# endif
35#endif
36
37#ifdef PARAMS
38# undef PARAMS
39#endif
40#if PROTOTYPES
41# define PARAMS(x) x
42#else
43# define PARAMS(x) ()
44#endif
45
46/* Number of bits in an unsigned char. */
47#ifndef CHARBITS
48#define CHARBITS 8
49#endif
50
51/* First integer value that is greater than any character code. */
52#define NOTCHAR (1 << CHARBITS)
53
54/* INTBITS need not be exact, just a lower bound. */
55#ifndef INTBITS
56#define INTBITS (CHARBITS * sizeof (int))
57#endif
58
59/* Number of ints required to hold a bit for every character. */
60#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)
61
62/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
63typedef int charclass[CHARCLASS_INTS];
64
65/* The regexp is parsed into an array of tokens in postfix form. Some tokens
66 are operators and others are terminal symbols. Most (but not all) of these
67 codes are returned by the lexical analyzer. */
68
69typedef enum
70{
71 END = -1, /* END is a terminal symbol that matches the
72 end of input; any value of END or less in
73 the parse tree is such a symbol. Accepting
74 states of the DFA are those that would have
75 a transition on END. */
76
77 /* Ordinary character values are terminal symbols that match themselves. */
78
79 EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches
80 the empty string. */
81
82 BACKREF, /* BACKREF is generated by \<digit>; it
83 it not completely handled. If the scanner
84 detects a transition on backref, it returns
85 a kind of "semi-success" indicating that
86 the match will have to be verified with
87 a backtracking matcher. */
88
89 BEGLINE, /* BEGLINE is a terminal symbol that matches
90 the empty string if it is at the beginning
91 of a line. */
92
93 ENDLINE, /* ENDLINE is a terminal symbol that matches
94 the empty string if it is at the end of