clang 20.0.0git
FormatToken.h
Go to the documentation of this file.
1//===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the declaration of the FormatToken, a wrapper
11/// around Token with additional information related to formatting.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
16#define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
17
20#include "clang/Format/Format.h"
21#include "clang/Lex/Lexer.h"
22#include <unordered_set>
23
24namespace clang {
25namespace format {
26
27#define LIST_TOKEN_TYPES \
28 TYPE(AfterPPDirective) \
29 TYPE(ArrayInitializerLSquare) \
30 TYPE(ArraySubscriptLSquare) \
31 TYPE(AttributeColon) \
32 TYPE(AttributeLParen) \
33 TYPE(AttributeMacro) \
34 TYPE(AttributeRParen) \
35 TYPE(AttributeSquare) \
36 TYPE(BinaryOperator) \
37 TYPE(BitFieldColon) \
38 TYPE(BlockComment) \
39 /* l_brace of a block that is not the body of a (e.g. loop) statement. */ \
40 TYPE(BlockLBrace) \
41 TYPE(BracedListLBrace) \
42 TYPE(CaseLabelArrow) \
43 /* The colon at the end of a case label. */ \
44 TYPE(CaseLabelColon) \
45 TYPE(CastRParen) \
46 TYPE(ClassLBrace) \
47 TYPE(ClassRBrace) \
48 TYPE(CompoundRequirementLBrace) \
49 /* ternary ?: expression */ \
50 TYPE(ConditionalExpr) \
51 /* the condition in an if statement */ \
52 TYPE(ConditionLParen) \
53 TYPE(ConflictAlternative) \
54 TYPE(ConflictEnd) \
55 TYPE(ConflictStart) \
56 /* l_brace of if/for/while */ \
57 TYPE(ControlStatementLBrace) \
58 TYPE(ControlStatementRBrace) \
59 TYPE(CppCastLParen) \
60 TYPE(CSharpGenericTypeConstraint) \
61 TYPE(CSharpGenericTypeConstraintColon) \
62 TYPE(CSharpGenericTypeConstraintComma) \
63 TYPE(CSharpNamedArgumentColon) \
64 TYPE(CSharpNullable) \
65 TYPE(CSharpNullConditionalLSquare) \
66 TYPE(CSharpStringLiteral) \
67 TYPE(CtorInitializerColon) \
68 TYPE(CtorInitializerComma) \
69 TYPE(CtorDtorDeclName) \
70 TYPE(DesignatedInitializerLSquare) \
71 TYPE(DesignatedInitializerPeriod) \
72 TYPE(DictLiteral) \
73 TYPE(DoWhile) \
74 TYPE(ElseLBrace) \
75 TYPE(ElseRBrace) \
76 TYPE(EnumLBrace) \
77 TYPE(EnumRBrace) \
78 TYPE(FatArrow) \
79 TYPE(ForEachMacro) \
80 TYPE(FunctionAnnotationRParen) \
81 TYPE(FunctionDeclarationName) \
82 TYPE(FunctionDeclarationLParen) \
83 TYPE(FunctionLBrace) \
84 TYPE(FunctionLikeOrFreestandingMacro) \
85 TYPE(FunctionTypeLParen) \
86 /* The colons as part of a C11 _Generic selection */ \
87 TYPE(GenericSelectionColon) \
88 /* The colon at the end of a goto label. */ \
89 TYPE(GotoLabelColon) \
90 TYPE(IfMacro) \
91 TYPE(ImplicitStringLiteral) \
92 TYPE(InheritanceColon) \
93 TYPE(InheritanceComma) \
94 TYPE(InlineASMBrace) \
95 TYPE(InlineASMColon) \
96 TYPE(InlineASMSymbolicNameLSquare) \
97 TYPE(JavaAnnotation) \
98 TYPE(JsAndAndEqual) \
99 TYPE(JsComputedPropertyName) \
100 TYPE(JsExponentiation) \
101 TYPE(JsExponentiationEqual) \
102 TYPE(JsPipePipeEqual) \
103 TYPE(JsPrivateIdentifier) \
104 TYPE(JsTypeColon) \
105 TYPE(JsTypeOperator) \
106 TYPE(JsTypeOptionalQuestion) \
107 TYPE(LambdaArrow) \
108 TYPE(LambdaDefinitionLParen) \
109 TYPE(LambdaLBrace) \
110 TYPE(LambdaLSquare) \
111 TYPE(LeadingJavaAnnotation) \
112 TYPE(LineComment) \
113 TYPE(MacroBlockBegin) \
114 TYPE(MacroBlockEnd) \
115 TYPE(ModulePartitionColon) \
116 TYPE(NamespaceLBrace) \
117 TYPE(NamespaceMacro) \
118 TYPE(NamespaceRBrace) \
119 TYPE(NonNullAssertion) \
120 TYPE(NullCoalescingEqual) \
121 TYPE(NullCoalescingOperator) \
122 TYPE(NullPropagatingOperator) \
123 TYPE(ObjCBlockLBrace) \
124 TYPE(ObjCBlockLParen) \
125 TYPE(ObjCDecl) \
126 TYPE(ObjCForIn) \
127 TYPE(ObjCMethodExpr) \
128 TYPE(ObjCMethodSpecifier) \
129 TYPE(ObjCProperty) \
130 TYPE(ObjCStringLiteral) \
131 TYPE(OverloadedOperator) \
132 TYPE(OverloadedOperatorLParen) \
133 TYPE(PointerOrReference) \
134 TYPE(ProtoExtensionLSquare) \
135 TYPE(PureVirtualSpecifier) \
136 TYPE(RangeBasedForLoopColon) \
137 TYPE(RecordLBrace) \
138 TYPE(RecordRBrace) \
139 TYPE(RegexLiteral) \
140 TYPE(RequiresClause) \
141 TYPE(RequiresClauseInARequiresExpression) \
142 TYPE(RequiresExpression) \
143 TYPE(RequiresExpressionLBrace) \
144 TYPE(RequiresExpressionLParen) \
145 TYPE(SelectorName) \
146 TYPE(StartOfName) \
147 TYPE(StatementAttributeLikeMacro) \
148 TYPE(StatementMacro) \
149 /* A string that is part of a string concatenation. For C#, JavaScript, and \
150 * Java, it is used for marking whether a string needs parentheses around it \
151 * if it is to be split into parts joined by `+`. For Verilog, whether \
152 * braces need to be added to split it. Not used for other languages. */ \
153 TYPE(StringInConcatenation) \
154 TYPE(StructLBrace) \
155 TYPE(StructRBrace) \
156 TYPE(StructuredBindingLSquare) \
157 TYPE(SwitchExpressionLabel) \
158 TYPE(SwitchExpressionLBrace) \
159 TYPE(TableGenBangOperator) \
160 TYPE(TableGenCondOperator) \
161 TYPE(TableGenCondOperatorColon) \
162 TYPE(TableGenCondOperatorComma) \
163 TYPE(TableGenDAGArgCloser) \
164 TYPE(TableGenDAGArgListColon) \
165 TYPE(TableGenDAGArgListColonToAlign) \
166 TYPE(TableGenDAGArgListComma) \
167 TYPE(TableGenDAGArgListCommaToBreak) \
168 TYPE(TableGenDAGArgOpener) \
169 TYPE(TableGenDAGArgOpenerToBreak) \
170 TYPE(TableGenDAGArgOperatorID) \
171 TYPE(TableGenDAGArgOperatorToBreak) \
172 TYPE(TableGenListCloser) \
173 TYPE(TableGenListOpener) \
174 TYPE(TableGenMultiLineString) \
175 TYPE(TableGenTrailingPasteOperator) \
176 TYPE(TableGenValueSuffix) \
177 TYPE(TemplateCloser) \
178 TYPE(TemplateOpener) \
179 TYPE(TemplateString) \
180 TYPE(TrailingAnnotation) \
181 TYPE(TrailingReturnArrow) \
182 TYPE(TrailingUnaryOperator) \
183 TYPE(TypeDeclarationParen) \
184 TYPE(TemplateName) \
185 TYPE(TypeName) \
186 TYPE(TypenameMacro) \
187 TYPE(UnaryOperator) \
188 TYPE(UnionLBrace) \
189 TYPE(UnionRBrace) \
190 TYPE(UntouchableMacroFunc) \
191 TYPE(VariableTemplate) \
192 /* Like in 'assign x = 0, y = 1;' . */ \
193 TYPE(VerilogAssignComma) \
194 /* like in begin : block */ \
195 TYPE(VerilogBlockLabelColon) \
196 /* The square bracket for the dimension part of the type name. \
197 * In 'logic [1:0] x[1:0]', only the first '['. This way we can have space \
198 * before the first bracket but not the second. */ \
199 TYPE(VerilogDimensionedTypeName) \
200 /* list of port connections or parameters in a module instantiation */ \
201 TYPE(VerilogInstancePortComma) \
202 TYPE(VerilogInstancePortLParen) \
203 /* A parenthesized list within which line breaks are inserted by the \
204 * formatter, for example the list of ports in a module header. */ \
205 TYPE(VerilogMultiLineListLParen) \
206 /* for the base in a number literal, not including the quote */ \
207 TYPE(VerilogNumberBase) \
208 /* like `(strong1, pull0)` */ \
209 TYPE(VerilogStrength) \
210 /* Things inside the table in user-defined primitives. */ \
211 TYPE(VerilogTableItem) \
212 /* those that separate ports of different types */ \
213 TYPE(VerilogTypeComma) \
214 TYPE(Unknown)
215
216/// Determines the semantic type of a syntactic token, e.g. whether "<" is a
217/// template opener or binary operator.
218enum TokenType : uint8_t {
219#define TYPE(X) TT_##X,
221#undef TYPE
224
225/// Determines the name of a token type.
227
228// Represents what type of block a set of braces open.
230
231// The packing kind of a function's parameters.
233
235
236/// Roles a token can take in a configured macro expansion.
238 /// The token was expanded from a macro argument when formatting the expanded
239 /// token sequence.
241 /// The token is part of a macro argument that was previously formatted as
242 /// expansion when formatting the unexpanded macro call.
244 /// The token was expanded from a macro definition, and is not visible as part
245 /// of the macro call.
246 MR_Hidden,
247};
248
249struct FormatToken;
250
251/// Contains information on the token's role in a macro expansion.
252///
253/// Given the following definitions:
254/// A(X) = [ X ]
255/// B(X) = < X >
256/// C(X) = X
257///
258/// Consider the macro call:
259/// A({B(C(C(x)))}) -> [{<x>}]
260///
261/// In this case, the tokens of the unexpanded macro call will have the
262/// following relevant entries in their macro context (note that formatting
263/// the unexpanded macro call happens *after* formatting the expanded macro
264/// call):
265/// A( { B( C( C(x) ) ) } )
266/// Role: NN U NN NN NNUN N N U N (N=None, U=UnexpandedArg)
267///
268/// [ { < x > } ]
269/// Role: H E H E H E H (H=Hidden, E=ExpandedArg)
270/// ExpandedFrom[0]: A A A A A A A
271/// ExpandedFrom[1]: B B B
272/// ExpandedFrom[2]: C
273/// ExpandedFrom[3]: C
274/// StartOfExpansion: 1 0 1 2 0 0 0
275/// EndOfExpansion: 0 0 0 2 1 0 1
276struct MacroExpansion {
278
279 /// The token's role in the macro expansion.
280 /// When formatting an expanded macro, all tokens that are part of macro
281 /// arguments will be MR_ExpandedArg, while all tokens that are not visible in
282 /// the macro call will be MR_Hidden.
283 /// When formatting an unexpanded macro call, all tokens that are part of
284 /// macro arguments will be MR_UnexpandedArg.
286
287 /// The stack of macro call identifier tokens this token was expanded from.
290 /// The number of expansions of which this macro is the first entry.
291 unsigned StartOfExpansion = 0;
292
293 /// The number of currently open expansions in \c ExpandedFrom this macro is
294 /// the last token in.
295 unsigned EndOfExpansion = 0;
296};
299class AnnotatedLine;
300
301/// A wrapper around a \c Token storing information about the
302/// whitespace characters preceding it.
303struct FormatToken {
312 BlockKind(BK_Unknown), Decision(FD_Unformatted),
313 PackingKind(PPK_Inconclusive), TypeIsFinalized(false),
314 Type(TT_Unknown) {}
315
316 /// The \c Token.
318
319 /// The raw text of the token.
320 ///
321 /// Contains the raw token text without leading whitespace and without leading
322 /// escaped newlines.
323 StringRef TokenText;
324
325 /// A token can have a special role that can carry extra information
326 /// about the token's formatting.
327 /// FIXME: Make FormatToken for parsing and AnnotatedToken two different
328 /// classes and make this a unique_ptr in the AnnotatedToken class.
329 std::shared_ptr<TokenRole> Role;
331 /// The range of the whitespace immediately preceding the \c Token.
334 /// Whether there is at least one unescaped newline before the \c
335 /// Token.
337
338 /// Whether the token text contains newlines (escaped or not).
339 unsigned IsMultiline : 1;
340
341 /// Indicates that this is the first token of the file.
342 unsigned IsFirst : 1;
343
344 /// Whether there must be a line break before this token.
345 ///
346 /// This happens for example when a preprocessor directive ended directly
347 /// before the token.
348 unsigned MustBreakBefore : 1;
350 /// Whether MustBreakBefore is finalized during parsing and must not
351 /// be reset between runs.
353
354 /// Set to \c true if this token is an unterminated literal.
356
357 /// \c true if it is allowed to break before this token.
358 unsigned CanBreakBefore : 1;
360 /// \c true if this is the ">" of "template<..>".
362
363 /// \c true if this token starts a binary expression, i.e. has at least
364 /// one fake l_paren with a precedence greater than prec::Unknown.
365 unsigned StartsBinaryExpression : 1;
366 /// \c true if this token ends a binary expression.
367 unsigned EndsBinaryExpression : 1;
368
369 /// Is this token part of a \c DeclStmt defining multiple variables?
370 ///
371 /// Only set if \c Type == \c TT_StartOfName.
372 unsigned PartOfMultiVariableDeclStmt : 1;
373
374 /// Does this line comment continue a line comment section?
375 ///
376 /// Only set to true if \c Type == \c TT_LineComment.
377 unsigned ContinuesLineCommentSection : 1;
378
379 /// If \c true, this token has been fully formatted (indented and
380 /// potentially re-formatted inside), and we do not allow further formatting
381 /// changes.
382 unsigned Finalized : 1;
383
384 /// \c true if this is the last token within requires clause.
385 unsigned ClosesRequiresClause : 1;
386
387 /// \c true if this token ends a group of C++ attributes.
388 unsigned EndsCppAttributeGroup : 1;
390private:
391 /// Contains the kind of block if this token is a brace.
392 unsigned BlockKind : 2;
393
394public:
396 return static_cast<BraceBlockKind>(BlockKind);
397 }
398 void setBlockKind(BraceBlockKind BBK) {
399 BlockKind = BBK;
400 assert(getBlockKind() == BBK && "BraceBlockKind overflow!");
401 }
403private:
404 /// Stores the formatting decision for the token once it was made.
405 unsigned Decision : 2;
406
407public:
409 return static_cast<FormatDecision>(Decision);
410 }
412 Decision = D;
413 assert(getDecision() == D && "FormatDecision overflow!");
414 }
416private:
417 /// If this is an opening parenthesis, how are the parameters packed?
418 unsigned PackingKind : 2;
419
420public:
422 return static_cast<ParameterPackingKind>(PackingKind);
423 }
425 PackingKind = K;
426 assert(getPackingKind() == K && "ParameterPackingKind overflow!");
427 }
428
429private:
430 unsigned TypeIsFinalized : 1;
432
433public:
434 /// Returns the token's type, e.g. whether "<" is a template opener or
435 /// binary operator.
436 TokenType getType() const { return Type; }
437 void setType(TokenType T) {
438 // If this token is a macro argument while formatting an unexpanded macro
439 // call, we do not change its type any more - the type was deduced from
440 // formatting the expanded macro stream already.
441 if (MacroCtx && MacroCtx->Role == MR_UnexpandedArg)
442 return;
443 assert((!TypeIsFinalized || T == Type) &&
444 "Please use overwriteFixedType to change a fixed type.");
446 }
447 /// Sets the type and also the finalized flag. This prevents the type to be
448 /// reset in TokenAnnotator::resetTokenMetadata(). If the type needs to be set
449 /// to another one please use overwriteFixedType, or even better remove the
450 /// need to reassign the type.
452 if (MacroCtx && MacroCtx->Role == MR_UnexpandedArg)
453 return;
454 Type = T;
455 TypeIsFinalized = true;
456 }
458 if (MacroCtx && MacroCtx->Role == MR_UnexpandedArg)
459 return;
460 TypeIsFinalized = false;
461 setType(T);
462 }
463 bool isTypeFinalized() const { return TypeIsFinalized; }
464
465 /// Used to set an operator precedence explicitly.
467
468 /// The number of newlines immediately before the \c Token.
469 ///
470 /// This can be used to determine what the user wrote in the original code
471 /// and thereby e.g. leave an empty line between two function definitions.
472 unsigned NewlinesBefore = 0;
473
474 /// The number of newlines immediately before the \c Token after formatting.
475 ///
476 /// This is used to avoid overlapping whitespace replacements when \c Newlines
477 /// is recomputed for a finalized preprocessor branching directive.
478 int Newlines = -1;
479
480 /// The offset just past the last '\n' in this token's leading
481 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
482 unsigned LastNewlineOffset = 0;
483
484 /// The width of the non-whitespace parts of the token (or its first