Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: trunk/essentials/dev-lang/python/Parser/parser.c@ 3506

Visit:

Last change on this file since 3506 was 3225, checked in by bird, 19 years ago
Python 2.5
File size: 9.5 KB

Line
1
2	/* Parser implementation */
3
4	/* For a description, see the comments at end of this file */
5
6	/* XXX To do: error recovery */
7
8	#include "Python.h"
9	#include "pgenheaders.h"
10	#include "token.h"
11	#include "grammar.h"
12	#include "node.h"
13	#include "parser.h"
14	#include "errcode.h"
15
16
17	#ifdef Py_DEBUG
18	extern int Py_DebugFlag;
19	#define D(x) if (!Py_DebugFlag); else x
20	#else
21	#define D(x)
22	#endif
23
24
25	/* STACK DATA TYPE */
26
27	static void s_reset(stack *);
28
29	static void
30	s_reset(stack *s)
31	{
32	s->s_top = &s->s_base[MAXSTACK];
33	}
34
35	#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36
37	static int
38	s_push(register stack s, dfa d, node *parent)
39	{
40	register stackentry *top;
41	if (s->s_top == s->s_base) {
42	fprintf(stderr, "s_push: parser stack overflow\n");
43	return E_NOMEM;
44	}
45	top = --s->s_top;
46	top->s_dfa = d;
47	top->s_parent = parent;
48	top->s_state = 0;
49	return 0;
50	}
51
52	#ifdef Py_DEBUG
53
54	static void
55	s_pop(register stack *s)
56	{
57	if (s_empty(s))
58	Py_FatalError("s_pop: parser stack underflow -- FATAL");
59	s->s_top++;
60	}
61
62	#else /* !Py_DEBUG */
63
64	#define s_pop(s) (s)->s_top++
65
66	#endif
67
68
69	/* PARSER CREATION */
70
71	parser_state *
72	PyParser_New(grammar *g, int start)
73	{
74	parser_state *ps;
75
76	if (!g->g_accel)
77	PyGrammar_AddAccelerators(g);
78	ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
79	if (ps == NULL)
80	return NULL;
81	ps->p_grammar = g;
82	#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
83	ps->p_flags = 0;
84	#endif
85	ps->p_tree = PyNode_New(start);
86	if (ps->p_tree == NULL) {
87	PyMem_FREE(ps);
88	return NULL;
89	}
90	s_reset(&ps->p_stack);
91	(void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
92	return ps;
93	}
94
95	void
96	PyParser_Delete(parser_state *ps)
97	{
98	/* NB If you want to save the parse tree,
99	you must set p_tree to NULL before calling delparser! */
100	PyNode_Free(ps->p_tree);
101	PyMem_FREE(ps);
102	}
103
104
105	/* PARSER STACK OPERATIONS */
106
107	static int
108	shift(register stack s, int type, char str, int newstate, int lineno, int col_offset)
109	{
110	int err;
111	assert(!s_empty(s));
112	err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
113	if (err)
114	return err;
115	s->s_top->s_state = newstate;
116	return 0;
117	}
118
119	static int
120	push(register stack s, int type, dfa d, int newstate, int lineno, int col_offset)
121	{
122	int err;
123	register node *n;
124	n = s->s_top->s_parent;
125	assert(!s_empty(s));
126	err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
127	if (err)
128	return err;
129	s->s_top->s_state = newstate;
130	return s_push(s, d, CHILD(n, NCH(n)-1));
131	}
132
133
134	/* PARSER PROPER */
135
136	static int
137	classify(parser_state ps, int type, char str)
138	{
139	grammar *g = ps->p_grammar;
140	register int n = g->g_ll.ll_nlabels;
141
142	if (type == NAME) {
143	register char *s = str;
144	register label *l = g->g_ll.ll_label;
145	register int i;
146	for (i = n; i > 0; i--, l++) {
147	if (l->lb_type != NAME \|\| l->lb_str == NULL \|\|
148	l->lb_str[0] != s[0] \|\|
149	strcmp(l->lb_str, s) != 0)
150	continue;
151	#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
152	if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
153	if (s[0] == 'w' && strcmp(s, "with") == 0)
154	break; /* not a keyword yet */
155	else if (s[0] == 'a' && strcmp(s, "as") == 0)
156	break; /* not a keyword yet */
157	}
158	#endif
159	D(printf("It's a keyword\n"));
160	return n - i;
161	}
162	}
163
164	{
165	register label *l = g->g_ll.ll_label;
166	register int i;
167	for (i = n; i > 0; i--, l++) {
168	if (l->lb_type == type && l->lb_str == NULL) {
169	D(printf("It's a token we know\n"));
170	return n - i;
171	}
172	}
173	}
174
175	D(printf("Illegal token\n"));
176	return -1;
177	}
178
179	#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
180	static void
181	future_hack(parser_state *ps)
182	{
183	node *n = ps->p_stack.s_top->s_parent;
184	node *ch;
185	int i;
186
187	/* from __future__ import ..., must have at least 4 children */
188	n = CHILD(n, 0);
189	if (NCH(n) < 4)
190	return;
191	ch = CHILD(n, 0);
192	if (STR(ch) == NULL \|\| strcmp(STR(ch), "from") != 0)
193	return;
194	ch = CHILD(n, 1);
195	if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
196	strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
197	return;
198	for (i = 3; i < NCH(n); i += 2) {
199	/* XXX: assume we don't have parentheses in import:
200	from __future__ import (x, y, z)
201	*/
202	ch = CHILD(n, i);
203	if (NCH(ch) == 1)
204	ch = CHILD(ch, 0);
205	if (NCH(ch) >= 1 && TYPE(CHILD(ch, 0)) == NAME &&
206	strcmp(STR(CHILD(ch, 0)), "with_statement") == 0) {
207	ps->p_flags \|= CO_FUTURE_WITH_STATEMENT;
208	break;
209	}
210	}
211	}
212	#endif /* future keyword */
213
214	int
215	PyParser_AddToken(register parser_state ps, register int type, char str,
216	int lineno, int col_offset, int *expected_ret)
217	{
218	register int ilabel;
219	int err;
220
221	D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
222
223	/* Find out which label this token is */
224	ilabel = classify(ps, type, str);
225	if (ilabel < 0)
226	return E_SYNTAX;
227
228	/* Loop until the token is shifted or an error occurred */
229	for (;;) {
230	/* Fetch the current dfa and state */
231	register dfa *d = ps->p_stack.s_top->s_dfa;
232	register state *s = &d->d_state[ps->p_stack.s_top->s_state];
233
234	D(printf(" DFA '%s', state %d:",
235	d->d_name, ps->p_stack.s_top->s_state));
236
237	/* Check accelerator */
238	if (s->s_lower <= ilabel && ilabel < s->s_upper) {
239	register int x = s->s_accel[ilabel - s->s_lower];
240	if (x != -1) {
241	if (x & (1<<7)) {
242	/* Push non-terminal */
243	int nt = (x >> 8) + NT_OFFSET;
244	int arrow = x & ((1<<7)-1);
245	dfa *d1 = PyGrammar_FindDFA(
246	ps->p_grammar, nt);
247	if ((err = push(&ps->p_stack, nt, d1,
248	arrow, lineno, col_offset)) > 0) {
249	D(printf(" MemError: push\n"));
250	return err;
251	}
252	D(printf(" Push ...\n"));
253	continue;
254	}
255
256	/* Shift the token */
257	if ((err = shift(&ps->p_stack, type, str,
258	x, lineno, col_offset)) > 0) {
259	D(printf(" MemError: shift.\n"));
260	return err;
261	}
262	D(printf(" Shift.\n"));
263	/* Pop while we are in an accept-only state */
264	while (s = &d->d_state
265	[ps->p_stack.s_top->s_state],
266	s->s_accept && s->s_narcs == 1) {
267	D(printf(" DFA '%s', state %d: "
268	"Direct pop.\n",
269	d->d_name,
270	ps->p_stack.s_top->s_state));
271	#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
272	if (d->d_name[0] == 'i' &&
273	strcmp(d->d_name,
274	"import_stmt") == 0)
275	future_hack(ps);
276	#endif
277	s_pop(&ps->p_stack);
278	if (s_empty(&ps->p_stack)) {
279	D(printf(" ACCEPT.\n"));
280	return E_DONE;
281	}
282	d = ps->p_stack.s_top->s_dfa;
283	}
284	return E_OK;
285	}
286	}
287
288	if (s->s_accept) {
289	#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
290	if (d->d_name[0] == 'i' &&
291	strcmp(d->d_name, "import_stmt") == 0)
292	future_hack(ps);
293	#endif
294	/* Pop this dfa and try again */
295	s_pop(&ps->p_stack);
296	D(printf(" Pop ...\n"));
297	if (s_empty(&ps->p_stack)) {
298	D(printf(" Error: bottom of stack.\n"));
299	return E_SYNTAX;
300	}
301	continue;
302	}
303
304	/* Stuck, report syntax error */
305	D(printf(" Error.\n"));
306	if (expected_ret) {
307	if (s->s_lower == s->s_upper - 1) {
308	/* Only one possible expected token */
309	*expected_ret = ps->p_grammar->
310	g_ll.ll_label[s->s_lower].lb_type;
311	}
312	else
313	*expected_ret = -1;
314	}
315	return E_SYNTAX;
316	}
317	}
318
319
320	#ifdef Py_DEBUG
321
322	/* DEBUG OUTPUT */
323
324	void
325	dumptree(grammar g, node n)
326	{
327	int i;
328
329	if (n == NULL)
330	printf("NIL");
331	else {
332	label l;
333	l.lb_type = TYPE(n);
334	l.lb_str = STR(n);
335	printf("%s", PyGrammar_LabelRepr(&l));
336	if (ISNONTERMINAL(TYPE(n))) {
337	printf("(");
338	for (i = 0; i < NCH(n); i++) {
339	if (i > 0)
340	printf(",");
341	dumptree(g, CHILD(n, i));
342	}
343	printf(")");
344	}
345	}
346	}
347
348	void
349	showtree(grammar g, node n)
350	{
351	int i;
352
353	if (n == NULL)
354	return;
355	if (ISNONTERMINAL(TYPE(n))) {
356	for (i = 0; i < NCH(n); i++)
357	showtree(g, CHILD(n, i));
358	}
359	else if (ISTERMINAL(TYPE(n))) {
360	printf("%s", _PyParser_TokenNames[TYPE(n)]);
361	if (TYPE(n) == NUMBER \|\| TYPE(n) == NAME)
362	printf("(%s)", STR(n));
363	printf(" ");
364	}
365	else
366	printf("? ");
367	}
368
369	void
370	printtree(parser_state *ps)
371	{
372	if (Py_DebugFlag) {
373	printf("Parse tree:\n");
374	dumptree(ps->p_grammar, ps->p_tree);
375	printf("\n");
376	printf("Tokens:\n");
377	showtree(ps->p_grammar, ps->p_tree);
378	printf("\n");
379	}
380	printf("Listing:\n");
381	PyNode_ListTree(ps->p_tree);
382	printf("\n");
383	}
384
385	#endif /* Py_DEBUG */
386
387	/*
388
389	Description
390	-----------
391
392	The parser's interface is different than usual: the function addtoken()
393	must be called for each token in the input. This makes it possible to
394	turn it into an incremental parsing system later. The parsing system
395	constructs a parse tree as it goes.
396
397	A parsing rule is represented as a Deterministic Finite-state Automaton
398	(DFA). A node in a DFA represents a state of the parser; an arc represents
399	a transition. Transitions are either labeled with terminal symbols or
400	with non-terminals. When the parser decides to follow an arc labeled
401	with a non-terminal, it is invoked recursively with the DFA representing
402	the parsing rule for that as its initial state; when that DFA accepts,
403	the parser that invoked it continues. The parse tree constructed by the
404	recursively called parser is inserted as a child in the current parse tree.
405
406	The DFA's can be constructed automatically from a more conventional
407	language description. An extended LL(1) grammar (ELL(1)) is suitable.
408	Certain restrictions make the parser's life easier: rules that can produce
409	the empty string should be outlawed (there are other ways to put loops
410	or optional parts in the language). To avoid the need to construct
411	FIRST sets, we can require that all but the last alternative of a rule
412	(really: arc going out of a DFA's state) must begin with a terminal
413	symbol.
414
415	As an example, consider this grammar:
416
417	expr: term (OP term)*
418	term: CONSTANT \| '(' expr ')'
419
420	The DFA corresponding to the rule for expr is:
421
422	------->.---term-->.------->
423	^ \|
424	\| \|
425	\----OP----/
426
427	The parse tree generated for the input a+b is:
428
429	(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
430
431	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: