clang
20.0.0git
lib
Format
FormatToken.h
Go to the documentation of this file.
1
//===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file contains the declaration of the FormatToken, a wrapper
11
/// around Token with additional information related to formatting.
12
///
13
//===----------------------------------------------------------------------===//
14
15
#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
16
#define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
17
18
#include "
clang/Basic/IdentifierTable.h
"
19
#include "
clang/Basic/OperatorPrecedence.h
"
20
#include "
clang/Format/Format.h
"
21
#include "
clang/Lex/Lexer.h
"
22
#include <unordered_set>
23
24
namespace
clang
{
25
namespace
format {
26
27
#define LIST_TOKEN_TYPES \
28
TYPE(AfterPPDirective) \
29
TYPE(ArrayInitializerLSquare) \
30
TYPE(ArraySubscriptLSquare) \
31
TYPE(AttributeColon) \
32
TYPE(AttributeLParen) \
33
TYPE(AttributeMacro) \
34
TYPE(AttributeRParen) \
35
TYPE(AttributeSquare) \
36
TYPE(BinaryOperator) \
37
TYPE(BitFieldColon) \
38
TYPE(BlockComment) \
39
/* l_brace of a block that is not the body of a (e.g. loop) statement. */
\
40
TYPE(BlockLBrace) \
41
TYPE(BracedListLBrace) \
42
TYPE(CaseLabelArrow) \
43
/* The colon at the end of a case label. */
\
44
TYPE(CaseLabelColon) \
45
TYPE(CastRParen) \
46
TYPE(ClassLBrace) \
47
TYPE(ClassRBrace) \
48
TYPE(CompoundRequirementLBrace) \
49
/* ternary ?: expression */
\
50
TYPE(ConditionalExpr) \
51
/* the condition in an if statement */
\
52
TYPE(ConditionLParen) \
53
TYPE(ConflictAlternative) \
54
TYPE(ConflictEnd) \
55
TYPE(ConflictStart) \
56
/* l_brace of if/for/while */
\
57
TYPE(ControlStatementLBrace) \
58
TYPE(ControlStatementRBrace) \
59
TYPE(CppCastLParen) \
60
TYPE(CSharpGenericTypeConstraint) \
61
TYPE(CSharpGenericTypeConstraintColon) \
62
TYPE(CSharpGenericTypeConstraintComma) \
63
TYPE(CSharpNamedArgumentColon) \
64
TYPE(CSharpNullable) \
65
TYPE(CSharpNullConditionalLSquare) \
66
TYPE(CSharpStringLiteral) \
67
TYPE(CtorInitializerColon) \
68
TYPE(CtorInitializerComma) \
69
TYPE(CtorDtorDeclName) \
70
TYPE(DesignatedInitializerLSquare) \
71
TYPE(DesignatedInitializerPeriod) \
72
TYPE(DictLiteral) \
73
TYPE(DoWhile) \
74
TYPE(ElseLBrace) \
75
TYPE(ElseRBrace) \
76
TYPE(EnumLBrace) \
77
TYPE(EnumRBrace) \
78
TYPE(FatArrow) \
79
TYPE(ForEachMacro) \
80
TYPE(FunctionAnnotationRParen) \
81
TYPE(FunctionDeclarationName) \
82
TYPE(FunctionDeclarationLParen) \
83
TYPE(FunctionLBrace) \
84
TYPE(FunctionLikeOrFreestandingMacro) \
85
TYPE(FunctionTypeLParen) \
86
/* The colons as part of a C11 _Generic selection */
\
87
TYPE(GenericSelectionColon) \
88
/* The colon at the end of a goto label. */
\
89
TYPE(GotoLabelColon) \
90
TYPE(IfMacro) \
91
TYPE(ImplicitStringLiteral) \
92
TYPE(InheritanceColon) \
93
TYPE(InheritanceComma) \
94
TYPE(InlineASMBrace) \
95
TYPE(InlineASMColon) \
96
TYPE(InlineASMSymbolicNameLSquare) \
97
TYPE(JavaAnnotation) \
98
TYPE(JsAndAndEqual) \
99
TYPE(JsComputedPropertyName) \
100
TYPE(JsExponentiation) \
101
TYPE(JsExponentiationEqual) \
102
TYPE(JsPipePipeEqual) \
103
TYPE(JsPrivateIdentifier) \
104
TYPE(JsTypeColon) \
105
TYPE(JsTypeOperator) \
106
TYPE(JsTypeOptionalQuestion) \
107
TYPE(LambdaArrow) \
108
TYPE(LambdaDefinitionLParen) \
109
TYPE(LambdaLBrace) \
110
TYPE(LambdaLSquare) \
111
TYPE(LeadingJavaAnnotation) \
112
TYPE(LineComment) \
113
TYPE(MacroBlockBegin) \
114
TYPE(MacroBlockEnd) \
115
TYPE(ModulePartitionColon) \
116
TYPE(NamespaceLBrace) \
117
TYPE(NamespaceMacro) \
118
TYPE(NamespaceRBrace) \
119
TYPE(NonNullAssertion) \
120
TYPE(NullCoalescingEqual) \
121
TYPE(NullCoalescingOperator) \
122
TYPE(NullPropagatingOperator) \
123
TYPE(ObjCBlockLBrace) \
124
TYPE(ObjCBlockLParen) \
125
TYPE(ObjCDecl) \
126
TYPE(ObjCForIn) \
127
TYPE(ObjCMethodExpr) \
128
TYPE(ObjCMethodSpecifier) \
129
TYPE(ObjCProperty) \
130
TYPE(ObjCStringLiteral) \
131
TYPE(OverloadedOperator) \
132
TYPE(OverloadedOperatorLParen) \
133
TYPE(PointerOrReference) \
134
TYPE(ProtoExtensionLSquare) \
135
TYPE(PureVirtualSpecifier) \
136
TYPE(RangeBasedForLoopColon) \
137
TYPE(RecordLBrace) \
138
TYPE(RecordRBrace) \
139
TYPE(RegexLiteral) \
140
TYPE(RequiresClause) \
141
TYPE(RequiresClauseInARequiresExpression) \
142
TYPE(RequiresExpression) \
143
TYPE(RequiresExpressionLBrace) \
144
TYPE(RequiresExpressionLParen) \
145
TYPE(SelectorName) \
146
TYPE(StartOfName) \
147
TYPE(StatementAttributeLikeMacro) \
148
TYPE(StatementMacro) \
149
/* A string that is part of a string concatenation. For C#, JavaScript, and \
150
* Java, it is used for marking whether a string needs parentheses around it \
151
* if it is to be split into parts joined by `+`. For Verilog, whether \
152
* braces need to be added to split it. Not used for other languages. */
\
153
TYPE(StringInConcatenation) \
154
TYPE(StructLBrace) \
155
TYPE(StructRBrace) \
156
TYPE(StructuredBindingLSquare) \
157
TYPE(SwitchExpressionLabel) \
158
TYPE(SwitchExpressionLBrace) \
159
TYPE(TableGenBangOperator) \
160
TYPE(TableGenCondOperator) \
161
TYPE(TableGenCondOperatorColon) \
162
TYPE(TableGenCondOperatorComma) \
163
TYPE(TableGenDAGArgCloser) \
164
TYPE(TableGenDAGArgListColon) \
165
TYPE(TableGenDAGArgListColonToAlign) \
166
TYPE(TableGenDAGArgListComma) \
167
TYPE(TableGenDAGArgListCommaToBreak) \
168
TYPE(TableGenDAGArgOpener) \
169
TYPE(TableGenDAGArgOpenerToBreak) \
170
TYPE(TableGenDAGArgOperatorID) \
171
TYPE(TableGenDAGArgOperatorToBreak) \
172
TYPE(TableGenListCloser) \
173
TYPE(TableGenListOpener) \
174
TYPE(TableGenMultiLineString) \
175
TYPE(TableGenTrailingPasteOperator) \
176
TYPE(TableGenValueSuffix) \
177
TYPE(TemplateCloser) \
178
TYPE(TemplateOpener) \
179
TYPE(TemplateString) \
180
TYPE(TrailingAnnotation) \
181
TYPE(TrailingReturnArrow) \
182
TYPE(TrailingUnaryOperator) \
183
TYPE(TypeDeclarationParen) \
184
TYPE(TemplateName) \
185
TYPE(TypeName) \
186
TYPE(TypenameMacro) \
187
TYPE(UnaryOperator) \
188
TYPE(UnionLBrace) \
189
TYPE(UnionRBrace) \
190
TYPE(UntouchableMacroFunc) \
191
TYPE(VariableTemplate) \
192
/* Like in 'assign x = 0, y = 1;' . */
\
193
TYPE(VerilogAssignComma) \
194
/* like in begin : block */
\
195
TYPE(VerilogBlockLabelColon) \
196
/* The square bracket for the dimension part of the type name. \
197
* In 'logic [1:0] x[1:0]', only the first '['. This way we can have space \
198
* before the first bracket but not the second. */
\
199
TYPE(VerilogDimensionedTypeName) \
200
/* list of port connections or parameters in a module instantiation */
\
201
TYPE(VerilogInstancePortComma) \
202
TYPE(VerilogInstancePortLParen) \
203
/* A parenthesized list within which line breaks are inserted by the \
204
* formatter, for example the list of ports in a module header. */
\
205
TYPE(VerilogMultiLineListLParen) \
206
/* for the base in a number literal, not including the quote */
\
207
TYPE(VerilogNumberBase) \
208
/* like `(strong1, pull0)` */
\
209
TYPE(VerilogStrength) \
210
/* Things inside the table in user-defined primitives. */
\
211
TYPE(VerilogTableItem) \
212
/* those that separate ports of different types */
\
213
TYPE(VerilogTypeComma) \
214
TYPE(Unknown)
215
216
/// Determines the semantic type of a syntactic token, e.g. whether "<" is a
217
/// template opener or binary operator.
218
enum
TokenType
: uint8_t {
219
#define TYPE(X) TT_##X,
220
LIST_TOKEN_TYPES
221
#undef TYPE
222
NUM_TOKEN_TYPES
223
};
224
225
/// Determines the name of a token type.
226
const
char
*
getTokenTypeName
(
TokenType
Type
);
227
228
// Represents what type of block a set of braces open.
229
enum
BraceBlockKind
{
BK_Unknown
,
BK_Block
,
BK_BracedInit
};
230
231
// The packing kind of a function's parameters.
232
enum
ParameterPackingKind
{
PPK_BinPacked
,
PPK_OnePerLine
,
PPK_Inconclusive
};
233
234
enum
FormatDecision
{
FD_Unformatted
,
FD_Continue
,
FD_Break
};
235
236
/// Roles a token can take in a configured macro expansion.
237
enum
MacroRole
{
238
/// The token was expanded from a macro argument when formatting the expanded
239
/// token sequence.
240
MR_ExpandedArg
,
241
/// The token is part of a macro argument that was previously formatted as
242
/// expansion when formatting the unexpanded macro call.
243
MR_UnexpandedArg
,
244
/// The token was expanded from a macro definition, and is not visible as part
245
/// of the macro call.
246
MR_Hidden
,
247
};
248
249
struct
FormatToken;
250
251
/// Contains information on the token's role in a macro expansion.
252
///
253
/// Given the following definitions:
254
/// A(X) = [ X ]
255
/// B(X) = < X >
256
/// C(X) = X
257
///
258
/// Consider the macro call:
259
/// A({B(C(C(x)))}) -> [{<x>}]
260
///
261
/// In this case, the tokens of the unexpanded macro call will have the
262
/// following relevant entries in their macro context (note that formatting
263
/// the unexpanded macro call happens *after* formatting the expanded macro
264
/// call):
265
/// A( { B( C( C(x) ) ) } )
266
/// Role: NN U NN NN NNUN N N U N (N=None, U=UnexpandedArg)
267
///
268
/// [ { < x > } ]
269
/// Role: H E H E H E H (H=Hidden, E=ExpandedArg)
270
/// ExpandedFrom[0]: A A A A A A A
271
/// ExpandedFrom[1]: B B B
272
/// ExpandedFrom[2]: C
273
/// ExpandedFrom[3]: C
274
/// StartOfExpansion: 1 0 1 2 0 0 0
275
/// EndOfExpansion: 0 0 0 2 1 0 1
276
struct
MacroExpansion
{
277
MacroExpansion
(
MacroRole
Role
) :
Role
(
Role
) {}
278
279
/// The token's role in the macro expansion.
280
/// When formatting an expanded macro, all tokens that are part of macro
281
/// arguments will be MR_ExpandedArg, while all tokens that are not visible in
282
/// the macro call will be MR_Hidden.
283
/// When formatting an unexpanded macro call, all tokens that are part of
284
/// macro arguments will be MR_UnexpandedArg.
285
MacroRole
Role
;
286
287
/// The stack of macro call identifier tokens this token was expanded from.
288
llvm::SmallVector<FormatToken *, 1>
ExpandedFrom
;
289
290
/// The number of expansions of which this macro is the first entry.
291
unsigned
StartOfExpansion
= 0;
292
293
/// The number of currently open expansions in \c ExpandedFrom this macro is
294
/// the last token in.
295
unsigned
EndOfExpansion
= 0;
296
};
297
298
class
TokenRole
;
299
class
AnnotatedLine
;
300
301
/// A wrapper around a \c Token storing information about the
302
/// whitespace characters preceding it.
303
struct
FormatToken
{
304
FormatToken
()
305
:
HasUnescapedNewline
(
false
),
IsMultiline
(
false
),
IsFirst
(
false
),
306
MustBreakBefore
(
false
),
MustBreakBeforeFinalized
(
false
),
307
IsUnterminatedLiteral
(
false
),
CanBreakBefore
(
false
),
308
ClosesTemplateDeclaration
(
false
),
StartsBinaryExpression
(
false
),
309
EndsBinaryExpression
(
false
),
PartOfMultiVariableDeclStmt
(
false
),
310
ContinuesLineCommentSection
(
false
),
Finalized
(
false
),
311
ClosesRequiresClause
(
false
),
EndsCppAttributeGroup
(
false
),
312
BlockKind(
BK_Unknown
), Decision(
FD_Unformatted
),
313
PackingKind(
PPK_Inconclusive
), TypeIsFinalized(
false
),
314
Type
(TT_Unknown) {}
315
316
/// The \c Token.
317
Token
Tok
;
318
319
/// The raw text of the token.
320
///
321
/// Contains the raw token text without leading whitespace and without leading
322
/// escaped newlines.
323
StringRef
TokenText
;
324
325
/// A token can have a special role that can carry extra information
326
/// about the token's formatting.
327
/// FIXME: Make FormatToken for parsing and AnnotatedToken two different
328
/// classes and make this a unique_ptr in the AnnotatedToken class.
329
std::shared_ptr<TokenRole>
Role
;
330
331
/// The range of the whitespace immediately preceding the \c Token.
332
SourceRange
WhitespaceRange
;
333
334
/// Whether there is at least one unescaped newline before the \c
335
/// Token.
336
unsigned
HasUnescapedNewline
: 1;
337
338
/// Whether the token text contains newlines (escaped or not).
339
unsigned
IsMultiline
: 1;
340
341
/// Indicates that this is the first token of the file.
342
unsigned
IsFirst
: 1;
343
344
/// Whether there must be a line break before this token.
345
///
346
/// This happens for example when a preprocessor directive ended directly
347
/// before the token.
348
unsigned
MustBreakBefore
: 1;
349
350
/// Whether MustBreakBefore is finalized during parsing and must not
351
/// be reset between runs.
352
unsigned
MustBreakBeforeFinalized
: 1;
353
354
/// Set to \c true if this token is an unterminated literal.
355
unsigned
IsUnterminatedLiteral
: 1;
356
357
/// \c true if it is allowed to break before this token.
358
unsigned
CanBreakBefore
: 1;
359
360
/// \c true if this is the ">" of "template<..>".
361
unsigned
ClosesTemplateDeclaration
: 1;
362
363
/// \c true if this token starts a binary expression, i.e. has at least
364
/// one fake l_paren with a precedence greater than prec::Unknown.
365
unsigned
StartsBinaryExpression
: 1;
366
/// \c true if this token ends a binary expression.
367
unsigned
EndsBinaryExpression
: 1;
368
369
/// Is this token part of a \c DeclStmt defining multiple variables?
370
///
371
/// Only set if \c Type == \c TT_StartOfName.
372
unsigned
PartOfMultiVariableDeclStmt
: 1;
373
374
/// Does this line comment continue a line comment section?
375
///
376
/// Only set to true if \c Type == \c TT_LineComment.
377
unsigned
ContinuesLineCommentSection
: 1;
378
379
/// If \c true, this token has been fully formatted (indented and
380
/// potentially re-formatted inside), and we do not allow further formatting
381
/// changes.
382
unsigned
Finalized
: 1;
383
384
/// \c true if this is the last token within requires clause.
385
unsigned
ClosesRequiresClause
: 1;
386
387
/// \c true if this token ends a group of C++ attributes.
388
unsigned
EndsCppAttributeGroup
: 1;
389
390
private
:
391
/// Contains the kind of block if this token is a brace.
392
unsigned
BlockKind : 2;
393
394
public
:
395
BraceBlockKind
getBlockKind
()
const
{
396
return
static_cast<
BraceBlockKind
>
(BlockKind);
397
}
398
void
setBlockKind
(
BraceBlockKind
BBK) {
399
BlockKind = BBK;
400
assert(
getBlockKind
() == BBK &&
"BraceBlockKind overflow!"
);
401
}
402
403
private
:
404
/// Stores the formatting decision for the token once it was made.
405
unsigned
Decision : 2;
406
407
public
:
408
FormatDecision
getDecision
()
const
{
409
return
static_cast<
FormatDecision
>
(Decision);
410
}
411
void
setDecision
(
FormatDecision
D
) {
412
Decision =
D
;
413
assert(
getDecision
() ==
D
&&
"FormatDecision overflow!"
);
414
}
415
416
private
:
417
/// If this is an opening parenthesis, how are the parameters packed?
418
unsigned
PackingKind : 2;
419
420
public
:
421
ParameterPackingKind
getPackingKind
()
const
{
422
return
static_cast<
ParameterPackingKind
>
(PackingKind);
423
}
424
void
setPackingKind
(
ParameterPackingKind
K) {
425
PackingKind = K;
426
assert(
getPackingKind
() == K &&
"ParameterPackingKind overflow!"
);
427
}
428
429
private
:
430
unsigned
TypeIsFinalized : 1;
431
TokenType
Type
;
432
433
public
:
434
/// Returns the token's type, e.g. whether "<" is a template opener or
435
/// binary operator.
436
TokenType
getType
()
const
{
return
Type
; }
437
void
setType
(
TokenType
T
) {
438
// If this token is a macro argument while formatting an unexpanded macro
439
// call, we do not change its type any more - the type was deduced from
440
// formatting the expanded macro stream already.
441
if
(
MacroCtx
&&
MacroCtx
->Role ==
MR_UnexpandedArg
)
442
return
;
443
assert((!TypeIsFinalized ||
T
==
Type
) &&
444
"Please use overwriteFixedType to change a fixed type."
);
445
Type
=
T
;
446
}
447
/// Sets the type and also the finalized flag. This prevents the type to be
448
/// reset in TokenAnnotator::resetTokenMetadata(). If the type needs to be set
449
/// to another one please use overwriteFixedType, or even better remove the
450
/// need to reassign the type.
451
void
setFinalizedType
(
TokenType
T
) {
452
if
(
MacroCtx
&&
MacroCtx
->Role ==
MR_UnexpandedArg
)
453
return
;
454
Type
=
T
;
455
TypeIsFinalized =
true
;
456
}
457
void
overwriteFixedType
(
TokenType
T
) {
458
if
(
MacroCtx
&&
MacroCtx
->Role ==
MR_UnexpandedArg
)
459
return
;
460
TypeIsFinalized =
false
;
461
setType
(
T
);
462
}
463
bool
isTypeFinalized
()
const
{
return
TypeIsFinalized; }
464
465
/// Used to set an operator precedence explicitly.
466
prec::Level
ForcedPrecedence
=
prec::Unknown
;
467
468
/// The number of newlines immediately before the \c Token.
469
///
470
/// This can be used to determine what the user wrote in the original code
471
/// and thereby e.g. leave an empty line between two function definitions.
472
unsigned
NewlinesBefore
= 0;
473
474
/// The number of newlines immediately before the \c Token after formatting.
475
///
476
/// This is used to avoid overlapping whitespace replacements when \c Newlines
477
/// is recomputed for a finalized preprocessor branching directive.
478
int
Newlines
= -1;
479
480
/// The offset just past the last '\n' in this token's leading
481
/// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
482
unsigned
LastNewlineOffset
= 0;
483
484
/// The width of the non-whitespace parts of the token (or its first