Context Navigation

tokenizer.cpp@ 1028

Last change on this file since 1028 was 846, checked in by Dmitry A. Kuminov, 14 years ago
trunk: Merged in qt 4.7.2 sources from branches/vendor/nokia/qt.
File size: 11.2 KB

Rev	Line
[2]	1	/****************************************************************************
	2	**
[561]	3	** Copyright (C) 2001-2004 Roberto Raggi
[846]	4	** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
[561]	5	** All rights reserved.
	6	** Contact: Nokia Corporation ([email protected])
[2]	7	**
	8	** This file is part of the qt3to4 porting application of the Qt Toolkit.
	9	**
	10	** $QT_BEGIN_LICENSE:LGPL$
	11	** Commercial Usage
	12	** Licensees holding valid Qt Commercial licenses may use this file in
	13	** accordance with the Qt Commercial License Agreement provided with the
	14	** Software or, alternatively, in accordance with the terms contained in
	15	** a written agreement between you and Nokia.
	16	**
	17	** GNU Lesser General Public License Usage
	18	** Alternatively, this file may be used under the terms of the GNU Lesser
	19	** General Public License version 2.1 as published by the Free Software
	20	** Foundation and appearing in the file LICENSE.LGPL included in the
	21	** packaging of this file. Please review the following information to
	22	** ensure the GNU Lesser General Public License version 2.1 requirements
	23	** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
	24	**
[561]	25	** In addition, as a special exception, Nokia gives you certain additional
	26	** rights. These rights are described in the Nokia Qt LGPL Exception
	27	** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
[2]	28	**
	29	** GNU General Public License Usage
	30	** Alternatively, this file may be used under the terms of the GNU
	31	** General Public License version 3.0 as published by the Free Software
	32	** Foundation and appearing in the file LICENSE.GPL included in the
	33	** packaging of this file. Please review the following information to
	34	** ensure the GNU General Public License version 3.0 requirements will be
	35	** met: http://www.gnu.org/copyleft/gpl.html.
	36	**
[561]	37	** If you have questions regarding the use of this file, please contact
	38	** Nokia at [email protected].
[2]	39	** $QT_END_LICENSE$
	40	**
	41	****************************************************************************/
	42
	43	#include "tokenizer.h"
	44	#include "tokens.h"
	45	#include <QDateTime>
	46	#include <QHash>
	47	#include <ctype.h>
	48
	49	QT_BEGIN_NAMESPACE
	50
	51	using TokenEngine::Token;
	52
	53	static QHash<QByteArray, bool> preprocessed;
	54	bool Tokenizer::s_initialized = false;
	55	Tokenizer::scan_fun_ptr Tokenizer::s_scan_table[128 + 1];
	56	int Tokenizer::s_attr_table[256];
	57
	58	Tokenizer::Tokenizer()
	59	: m_buffer(0), m_ptr(0)
	60	{
	61	if (!s_initialized)
	62	setupScanTable();
	63	}
	64
	65	Tokenizer::~Tokenizer()
	66	{
	67	}
	68
	69	enum
	70	{
	71	A_Alpha = 0x01,
	72	A_Digit = 0x02,
	73	A_Alphanum = A_Alpha \| A_Digit,
	74	A_Whitespace = 0x04
	75	};
	76
	77	void Tokenizer::setupScanTable()
	78	{
	79	s_initialized = true;
	80
	81	memset(s_attr_table, 0, 256);
	82
	83	for (int i=0; i<128; ++i) {
	84	switch (i) {
	85	case ':':
	86	case '*':
	87	case '%':
	88	case '^':
	89	case '=':
	90	case '!':
	91	case '&':
	92	case '\|':
	93	case '+':
	94	case '<':
	95	case '>':
	96	case '-':
	97	case '.':
	98	s_scan_table[i] = &Tokenizer::scanOperator;
	99	break;
	100
	101	case '\r':
	102	case '\n':
	103	s_scan_table[i] = &Tokenizer::scanNewline;
	104	break;
	105
	106	case '#':
	107	s_scan_table[i] = &Tokenizer::scanPreprocessor;
	108	break;
	109
	110	case '/':
	111	s_scan_table[i] = &Tokenizer::scanComment;
	112	break;
	113
	114	case '\'':
	115	s_scan_table[i] = &Tokenizer::scanCharLiteral;
	116	break;
	117
	118	case '"':
	119	s_scan_table[i] = &Tokenizer::scanStringLiteral;
	120	break;
	121
	122	default:
	123	if (isspace(i)) {
	124	s_scan_table[i] = &Tokenizer::scanWhiteSpaces;
	125	s_attr_table[i] \|= A_Whitespace;
	126	} else if (isalpha(i) \|\| i == '_') {
	127	s_scan_table[i] = &Tokenizer::scanIdentifier;
	128	s_attr_table[i] \|= A_Alpha;
	129	} else if (isdigit(i)) {
	130	s_scan_table[i] = &Tokenizer::scanNumberLiteral;
	131	s_attr_table[i] \|= A_Digit;
	132	} else
	133	s_scan_table[i] = &Tokenizer::scanChar;
	134	}
	135	}
	136
	137	s_scan_table[128] = &Tokenizer::scanUnicodeChar;
	138	}
	139
	140	QVector<TokenEngine::Token> Tokenizer::tokenize(QByteArray text)
	141	{
	142	m_tokens.clear();
	143
	144	m_buffer = text;
	145	m_ptr = 0;
	146
	147	// tokenize
	148	for (;;) {
	149	Token tk;
	150	bool endOfFile = nextToken(tk);
	151	if (endOfFile) {
	152	break;
	153	}
	154	m_tokens.append(tk);
	155	}
	156
	157	return m_tokens;
	158	}
	159
	160	bool Tokenizer::nextToken(Token &tok)
	161	{
	162	int start = m_ptr;
	163	unsigned char ch = (unsigned char)m_buffer[m_ptr];
	164
	165	int kind = 0;
	166	(this->*s_scan_table[ch < 128 ? ch : 128])(&kind);
	167
	168	tok.start = start;
	169	tok.length = m_ptr - start;
	170
	171	return (kind == 0);
	172	}
	173
	174	void Tokenizer::scanChar(int *kind)
	175	{
	176	*kind = m_buffer[m_ptr++];
	177	}
	178
	179	void Tokenizer::scanWhiteSpaces(int *kind)
	180	{
	181	*kind = Token_whitespaces;
	182	while (unsigned char ch = m_buffer[m_ptr]) {
	183	if (s_attr_table[ch] & A_Whitespace)
	184	++m_ptr;
	185	else
	186	break;
	187	}
	188	}
	189
	190	void Tokenizer::scanNewline(int *kind)
	191	{
	192	Q_UNUSED(kind);
	193	const unsigned char ch = m_buffer[m_ptr++];
	194	// Check for \n.
	195	if (ch == '\n') {
	196	*kind = '\n';
	197	return;
	198	}
	199
	200	// Check for \r\n.
	201	if (ch == '\r' && m_buffer[m_ptr] == '\n') {
	202	*kind = '\n';
	203	++ m_ptr;
	204	return;
	205	}
	206
	207	*kind = ch;
	208	}
	209
	210	void Tokenizer::scanUnicodeChar(int *kind)
	211	{
	212	*kind = m_buffer[m_ptr++];
	213	}
	214
	215	void Tokenizer::scanCharLiteral(int *kind)
	216	{
	217	++m_ptr;
	218	for (;;) {
	219	unsigned char ch = m_buffer[m_ptr];
	220	switch (ch) {
	221	case '\0':
	222	case '\n':
	223	// ### error
	224	*kind = Token_char_literal;
	225	return;
	226	case '\\':
	227	if (m_buffer[m_ptr+1] == '\'' \|\| m_buffer[m_ptr+1] == '\\')
	228	m_ptr += 2;
	229	else
	230	++m_ptr;
	231	break;
	232	case '\'':
	233	++m_ptr;
	234	*kind = Token_char_literal;
	235	return;
	236	default:
	237	++m_ptr;
	238	break;
	239	}
	240	}
	241
	242	// ### error
	243	*kind = Token_char_literal;
	244	}
	245
	246	void Tokenizer::scanStringLiteral(int *kind)
	247	{
	248	++m_ptr;
	249	while (m_buffer[m_ptr]) {
	250	switch (m_buffer[m_ptr]) {
	251	case '\n':
	252	// ### error
	253	*kind = Token_string_literal;
	254	return;
	255	case '\\':
	256	if (m_buffer[m_ptr+1] == '"' \|\| m_buffer[m_ptr+1] == '\\')
	257	m_ptr += 2;
	258	else
	259	++m_ptr;
	260	break;
	261	case '"':
	262	++m_ptr;
	263	*kind = Token_string_literal;
	264	return;
	265	default:
	266	++m_ptr;
	267	break;
	268	}
	269	}
	270
	271	// ### error
	272	*kind = Token_string_literal;
	273	}
	274
	275	void Tokenizer::scanIdentifier(int *kind)
	276	{
	277	unsigned char ch;
	278	for (;;) {
	279	ch = m_buffer[m_ptr];
	280	if (s_attr_table[ch] & A_Alphanum)
	281	++m_ptr;
	282	else
	283	break;
	284	}
	285	*kind = Token_identifier;
	286	}
	287
	288	void Tokenizer::scanNumberLiteral(int *kind)
	289	{
	290	unsigned char ch;
	291	for (;;) {
	292	ch = m_buffer[m_ptr];
	293	if (s_attr_table[ch] & A_Alphanum \|\| ch == '.')
	294	++m_ptr;
	295	else
	296	break;
	297	}
	298
	299	// ### finish to implement me!!
	300	*kind = Token_number_literal;
	301	}
	302
	303	void Tokenizer::scanComment(int *kind)
	304	{
	305	if (!(m_buffer[m_ptr+1] == '/' \|\| m_buffer[m_ptr+1] == '*')) {
	306	scanOperator(kind);
	307	return;
	308	}
	309
	310	++m_ptr; // skip '/'
	311
	312	bool multiLineComment = m_buffer[m_ptr++] == '*';
	313
	314	while (m_buffer[m_ptr]) {
	315	switch (m_buffer[m_ptr]) {
	316	case '\r':
	317	case '\n':
	318	if (!multiLineComment) {
	319	*kind = Token_comment;
	320	return;
	321	}
	322
	323	(void) scanNewline(kind);
	324	break;
	325
	326	case '*':
	327	if (multiLineComment && m_buffer[m_ptr+1] == '/') {
	328	m_ptr += 2;
	329	*kind = Token_comment;
	330	return;
	331	}
	332	++m_ptr;
	333	break;
	334
	335	default:
	336	++m_ptr;
	337	}
	338	}
	339
	340	// ### error
	341	*kind = Token_comment;
	342	}
	343
	344
	345	void Tokenizer::scanPreprocessor(int *kind)
	346	{
	347	++m_ptr;
	348	*kind = Token_preproc;
	349	}
	350
	351
	352	void Tokenizer::scanOperator(int *kind)
	353	{
	354	switch (m_buffer[m_ptr]) {
	355	case ':':
	356	if (m_buffer[m_ptr+1] == ':') {
	357	m_ptr += 2;
	358	*kind = Token_scope;
	359	return;
	360	}
	361	break;
	362
	363	case '*':
	364	case '/':
	365	case '%':
	366	case '^':
	367	if (m_buffer[m_ptr+1] == '=') {
	368	m_ptr += 2;
	369	*kind = Token_assign;
	370	return;
	371	}
	372	break;
	373
	374	case '=':
	375	case '!':
	376	if (m_buffer[m_ptr+1] == '=') {
	377	m_ptr += 2;
	378	*kind = Token_eq;
	379	return;
	380	}
	381	break;
	382
	383	case '&':
	384	if (m_buffer[m_ptr+1] == '&') {
	385	m_ptr += 2;
	386	*kind = Token_and;
	387	return;
	388	} else if (m_buffer[m_ptr+1] == '=') {
	389	m_ptr += 2;
	390	*kind = Token_assign;
	391	return;
	392	}
	393	break;
	394
	395	case '\|':
	396	if (m_buffer[m_ptr+1] == '\|' ) {
	397	m_ptr += 2;
	398	*kind = Token_or;
	399	return;
	400	} else if (m_buffer[m_ptr+1] == '=') {
	401	m_ptr += 2;
	402	*kind = Token_assign;
	403	return;
	404	}
	405	break;
	406
	407	case '+':
	408	if (m_buffer[m_ptr+1] == '+' ) {
	409	m_ptr += 2;
	410	*kind = Token_incr;
	411	return;
	412	} else if (m_buffer[m_ptr+1] == '=') {
	413	m_ptr += 2;
	414	*kind = Token_assign;
	415	return;
	416	}
	417	break;
	418
	419	case '<':
	420	if (m_buffer[m_ptr+1] == '<') {
	421	if (m_buffer[m_ptr+2] == '=') {
	422	m_ptr += 3;
	423	*kind = Token_assign;
	424	return;
	425	}
	426	m_ptr += 2;
	427	*kind = Token_shift;
	428	return;
	429	} else if (m_buffer[m_ptr+1] == '=') {
	430	m_ptr += 2;
	431	*kind = Token_leq;
	432	return;
	433	}
	434	break;
	435
	436	case '>':
	437	if (m_buffer[m_ptr+1] == '>') {
	438	if (m_buffer[m_ptr+2] == '=') {
	439	m_ptr += 3;
	440	*kind = Token_assign;
	441	return;
	442	}
	443	m_ptr += 2;
	444	*kind = Token_shift;
	445	return;
	446	} else if (m_buffer[m_ptr+1] == '=') {
	447	m_ptr += 2;
	448	*kind = Token_geq;
	449	return;
	450	}
	451	break;
	452
	453	case '-':
	454	if (m_buffer[m_ptr+1] == '>') {
	455	if (m_buffer[m_ptr+2] == '*') {
	456	m_ptr += 3;
	457	*kind = Token_ptrmem;
	458	return;
	459	}
	460	m_ptr += 2;
	461	*kind = Token_arrow;
	462	return;
	463	} else if (m_buffer[m_ptr+1] == '-') {
	464	m_ptr += 2;
	465	*kind = Token_decr;
	466	return;
	467	} else if (m_buffer[m_ptr+1] == '=') {
	468	m_ptr += 2;
	469	*kind = Token_assign;
	470	return;
	471	}
	472	break;
	473
	474	case '.':
	475	if (m_buffer[m_ptr+1] == '.' && m_buffer[m_ptr+2] == '.') {
	476	m_ptr += 3;
	477	*kind = Token_ellipsis;
	478	return;
	479	} else if (m_buffer[m_ptr+1] == '*') {
	480	m_ptr += 2;
	481	*kind = Token_ptrmem;
	482	return;
	483	}
	484	break;
	485
	486	}
	487
	488	*kind = m_buffer[m_ptr++];
	489	}
	490
	491	QT_END_NAMESPACE

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/tools/porting/src/tokenizer.cpp@ 1028

Download in other formats: