Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

tokenizer.cpp@ 348

Last change on this file since 348 was 2, checked in by Dmitry A. Kuminov, 16 years ago
Initially imported qt-all-opensource-src-4.5.1 from Trolltech.
File size: 11.2 KB

Line
1	/****************************************************************************
2	**
3	** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4	** Contact: Qt Software Information ([email protected])
5	** Copyright (C) 2001-2004 Roberto Raggi
6	**
7	** This file is part of the qt3to4 porting application of the Qt Toolkit.
8	**
9	** $QT_BEGIN_LICENSE:LGPL$
10	** Commercial Usage
11	** Licensees holding valid Qt Commercial licenses may use this file in
12	** accordance with the Qt Commercial License Agreement provided with the
13	** Software or, alternatively, in accordance with the terms contained in
14	** a written agreement between you and Nokia.
15	**
16	** GNU Lesser General Public License Usage
17	** Alternatively, this file may be used under the terms of the GNU Lesser
18	** General Public License version 2.1 as published by the Free Software
19	** Foundation and appearing in the file LICENSE.LGPL included in the
20	** packaging of this file. Please review the following information to
21	** ensure the GNU Lesser General Public License version 2.1 requirements
22	** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23	**
24	** In addition, as a special exception, Nokia gives you certain
25	** additional rights. These rights are described in the Nokia Qt LGPL
26	** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
27	** package.
28	**
29	** GNU General Public License Usage
30	** Alternatively, this file may be used under the terms of the GNU
31	** General Public License version 3.0 as published by the Free Software
32	** Foundation and appearing in the file LICENSE.GPL included in the
33	** packaging of this file. Please review the following information to
34	** ensure the GNU General Public License version 3.0 requirements will be
35	** met: http://www.gnu.org/copyleft/gpl.html.
36	**
37	** If you are unsure which license is appropriate for your use, please
38	** contact the sales department at [email protected].
39	** $QT_END_LICENSE$
40	**
41	****************************************************************************/
42
43	#include "tokenizer.h"
44	#include "tokens.h"
45	#include <QDateTime>
46	#include <QHash>
47	#include <ctype.h>
48
49	QT_BEGIN_NAMESPACE
50
51	using TokenEngine::Token;
52
53	static QHash<QByteArray, bool> preprocessed;
54	bool Tokenizer::s_initialized = false;
55	Tokenizer::scan_fun_ptr Tokenizer::s_scan_table[128 + 1];
56	int Tokenizer::s_attr_table[256];
57
58	Tokenizer::Tokenizer()
59	: m_buffer(0), m_ptr(0)
60	{
61	if (!s_initialized)
62	setupScanTable();
63	}
64
65	Tokenizer::~Tokenizer()
66	{
67	}
68
69	enum
70	{
71	A_Alpha = 0x01,
72	A_Digit = 0x02,
73	A_Alphanum = A_Alpha \| A_Digit,
74	A_Whitespace = 0x04
75	};
76
77	void Tokenizer::setupScanTable()
78	{
79	s_initialized = true;
80
81	memset(s_attr_table, 0, 256);
82
83	for (int i=0; i<128; ++i) {
84	switch (i) {
85	case ':':
86	case '*':
87	case '%':
88	case '^':
89	case '=':
90	case '!':
91	case '&':
92	case '\|':
93	case '+':
94	case '<':
95	case '>':
96	case '-':
97	case '.':
98	s_scan_table[i] = &Tokenizer::scanOperator;
99	break;
100
101	case '\r':
102	case '\n':
103	s_scan_table[i] = &Tokenizer::scanNewline;
104	break;
105
106	case '#':
107	s_scan_table[i] = &Tokenizer::scanPreprocessor;
108	break;
109
110	case '/':
111	s_scan_table[i] = &Tokenizer::scanComment;
112	break;
113
114	case '\'':
115	s_scan_table[i] = &Tokenizer::scanCharLiteral;
116	break;
117
118	case '"':
119	s_scan_table[i] = &Tokenizer::scanStringLiteral;
120	break;
121
122	default:
123	if (isspace(i)) {
124	s_scan_table[i] = &Tokenizer::scanWhiteSpaces;
125	s_attr_table[i] \|= A_Whitespace;
126	} else if (isalpha(i) \|\| i == '_') {
127	s_scan_table[i] = &Tokenizer::scanIdentifier;
128	s_attr_table[i] \|= A_Alpha;
129	} else if (isdigit(i)) {
130	s_scan_table[i] = &Tokenizer::scanNumberLiteral;
131	s_attr_table[i] \|= A_Digit;
132	} else
133	s_scan_table[i] = &Tokenizer::scanChar;
134	}
135	}
136
137	s_scan_table[128] = &Tokenizer::scanUnicodeChar;
138	}
139
140	QVector<TokenEngine::Token> Tokenizer::tokenize(QByteArray text)
141	{
142	m_tokens.clear();
143
144	m_buffer = text;
145	m_ptr = 0;
146
147	// tokenize
148	for (;;) {
149	Token tk;
150	bool endOfFile = nextToken(tk);
151	if (endOfFile) {
152	break;
153	}
154	m_tokens.append(tk);
155	}
156
157	return m_tokens;
158	}
159
160	bool Tokenizer::nextToken(Token &tok)
161	{
162	int start = m_ptr;
163	unsigned char ch = (unsigned char)m_buffer[m_ptr];
164
165	int kind = 0;
166	(this->*s_scan_table[ch < 128 ? ch : 128])(&kind);
167
168	tok.start = start;
169	tok.length = m_ptr - start;
170
171	return (kind == 0);
172	}
173
174	void Tokenizer::scanChar(int *kind)
175	{
176	*kind = m_buffer[m_ptr++];
177	}
178
179	void Tokenizer::scanWhiteSpaces(int *kind)
180	{
181	*kind = Token_whitespaces;
182	while (unsigned char ch = m_buffer[m_ptr]) {
183	if (s_attr_table[ch] & A_Whitespace)
184	++m_ptr;
185	else
186	break;
187	}
188	}
189
190	void Tokenizer::scanNewline(int *kind)
191	{
192	Q_UNUSED(kind);
193	const unsigned char ch = m_buffer[m_ptr++];
194	// Check for \n.
195	if (ch == '\n') {
196	*kind = '\n';
197	return;
198	}
199
200	// Check for \r\n.
201	if (ch == '\r' && m_buffer[m_ptr] == '\n') {
202	*kind = '\n';
203	++ m_ptr;
204	return;
205	}
206
207	*kind = ch;
208	}
209
210	void Tokenizer::scanUnicodeChar(int *kind)
211	{
212	*kind = m_buffer[m_ptr++];
213	}
214
215	void Tokenizer::scanCharLiteral(int *kind)
216	{
217	++m_ptr;
218	for (;;) {
219	unsigned char ch = m_buffer[m_ptr];
220	switch (ch) {
221	case '\0':
222	case '\n':
223	// ### error
224	*kind = Token_char_literal;
225	return;
226	case '\\':
227	if (m_buffer[m_ptr+1] == '\'' \|\| m_buffer[m_ptr+1] == '\\')
228	m_ptr += 2;
229	else
230	++m_ptr;
231	break;
232	case '\'':
233	++m_ptr;
234	*kind = Token_char_literal;
235	return;
236	default:
237	++m_ptr;
238	break;
239	}
240	}
241
242	// ### error
243	*kind = Token_char_literal;
244	}
245
246	void Tokenizer::scanStringLiteral(int *kind)
247	{
248	++m_ptr;
249	while (m_buffer[m_ptr]) {
250	switch (m_buffer[m_ptr]) {
251	case '\n':
252	// ### error
253	*kind = Token_string_literal;
254	return;
255	case '\\':
256	if (m_buffer[m_ptr+1] == '"' \|\| m_buffer[m_ptr+1] == '\\')
257	m_ptr += 2;
258	else
259	++m_ptr;
260	break;
261	case '"':
262	++m_ptr;
263	*kind = Token_string_literal;
264	return;
265	default:
266	++m_ptr;
267	break;
268	}
269	}
270
271	// ### error
272	*kind = Token_string_literal;
273	}
274
275	void Tokenizer::scanIdentifier(int *kind)
276	{
277	unsigned char ch;
278	for (;;) {
279	ch = m_buffer[m_ptr];
280	if (s_attr_table[ch] & A_Alphanum)
281	++m_ptr;
282	else
283	break;
284	}
285	*kind = Token_identifier;
286	}
287
288	void Tokenizer::scanNumberLiteral(int *kind)
289	{
290	unsigned char ch;
291	for (;;) {
292	ch = m_buffer[m_ptr];
293	if (s_attr_table[ch] & A_Alphanum \|\| ch == '.')
294	++m_ptr;
295	else
296	break;
297	}
298
299	// ### finish to implement me!!
300	*kind = Token_number_literal;
301	}
302
303	void Tokenizer::scanComment(int *kind)
304	{
305	if (!(m_buffer[m_ptr+1] == '/' \|\| m_buffer[m_ptr+1] == '*')) {
306	scanOperator(kind);
307	return;
308	}
309
310	++m_ptr; // skip '/'
311
312	bool multiLineComment = m_buffer[m_ptr++] == '*';
313
314	while (m_buffer[m_ptr]) {
315	switch (m_buffer[m_ptr]) {
316	case '\r':
317	case '\n':
318	if (!multiLineComment) {
319	*kind = Token_comment;
320	return;
321	}
322
323	(void) scanNewline(kind);
324	break;
325
326	case '*':
327	if (multiLineComment && m_buffer[m_ptr+1] == '/') {
328	m_ptr += 2;
329	*kind = Token_comment;
330	return;
331	}
332	++m_ptr;
333	break;
334
335	default:
336	++m_ptr;
337	}
338	}
339
340	// ### error
341	*kind = Token_comment;
342	}
343
344
345	void Tokenizer::scanPreprocessor(int *kind)
346	{
347	++m_ptr;
348	*kind = Token_preproc;
349	}
350
351
352	void Tokenizer::scanOperator(int *kind)
353	{
354	switch (m_buffer[m_ptr]) {
355	case ':':
356	if (m_buffer[m_ptr+1] == ':') {
357	m_ptr += 2;
358	*kind = Token_scope;
359	return;
360	}
361	break;
362
363	case '*':
364	case '/':
365	case '%':
366	case '^':
367	if (m_buffer[m_ptr+1] == '=') {
368	m_ptr += 2;
369	*kind = Token_assign;
370	return;
371	}
372	break;
373
374	case '=':
375	case '!':
376	if (m_buffer[m_ptr+1] == '=') {
377	m_ptr += 2;
378	*kind = Token_eq;
379	return;
380	}
381	break;
382
383	case '&':
384	if (m_buffer[m_ptr+1] == '&') {
385	m_ptr += 2;
386	*kind = Token_and;
387	return;
388	} else if (m_buffer[m_ptr+1] == '=') {
389	m_ptr += 2;
390	*kind = Token_assign;
391	return;
392	}
393	break;
394
395	case '\|':
396	if (m_buffer[m_ptr+1] == '\|' ) {
397	m_ptr += 2;
398	*kind = Token_or;
399	return;
400	} else if (m_buffer[m_ptr+1] == '=') {
401	m_ptr += 2;
402	*kind = Token_assign;
403	return;
404	}
405	break;
406
407	case '+':
408	if (m_buffer[m_ptr+1] == '+' ) {
409	m_ptr += 2;
410	*kind = Token_incr;
411	return;
412	} else if (m_buffer[m_ptr+1] == '=') {
413	m_ptr += 2;
414	*kind = Token_assign;
415	return;
416	}
417	break;
418
419	case '<':
420	if (m_buffer[m_ptr+1] == '<') {
421	if (m_buffer[m_ptr+2] == '=') {
422	m_ptr += 3;
423	*kind = Token_assign;
424	return;
425	}
426	m_ptr += 2;
427	*kind = Token_shift;
428	return;
429	} else if (m_buffer[m_ptr+1] == '=') {
430	m_ptr += 2;
431	*kind = Token_leq;
432	return;
433	}
434	break;
435
436	case '>':
437	if (m_buffer[m_ptr+1] == '>') {
438	if (m_buffer[m_ptr+2] == '=') {
439	m_ptr += 3;
440	*kind = Token_assign;
441	return;
442	}
443	m_ptr += 2;
444	*kind = Token_shift;
445	return;
446	} else if (m_buffer[m_ptr+1] == '=') {
447	m_ptr += 2;
448	*kind = Token_geq;
449	return;
450	}
451	break;
452
453	case '-':
454	if (m_buffer[m_ptr+1] == '>') {
455	if (m_buffer[m_ptr+2] == '*') {
456	m_ptr += 3;
457	*kind = Token_ptrmem;
458	return;
459	}
460	m_ptr += 2;
461	*kind = Token_arrow;
462	return;
463	} else if (m_buffer[m_ptr+1] == '-') {
464	m_ptr += 2;
465	*kind = Token_decr;
466	return;
467	} else if (m_buffer[m_ptr+1] == '=') {
468	m_ptr += 2;
469	*kind = Token_assign;
470	return;
471	}
472	break;
473
474	case '.':
475	if (m_buffer[m_ptr+1] == '.' && m_buffer[m_ptr+2] == '.') {
476	m_ptr += 3;
477	*kind = Token_ellipsis;
478	return;
479	} else if (m_buffer[m_ptr+1] == '*') {
480	m_ptr += 2;
481	*kind = Token_ptrmem;
482	return;
483	}
484	break;
485
486	}
487
488	*kind = m_buffer[m_ptr++];
489	}
490
491	QT_END_NAMESPACE

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/tools/porting/src/tokenizer.cpp@ 348

Download in other formats: