clang 20.0.0git
ContinuationIndenter.h
Go to the documentation of this file.
1//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements an indenter that manages the indentation of
11/// continuations.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17
18#include "Encoding.h"
19#include "FormatToken.h"
20
21namespace clang {
22class SourceManager;
23
24namespace format {
25
26class AnnotatedLine;
27class BreakableToken;
28struct FormatToken;
29struct LineState;
30struct ParenState;
31struct RawStringFormatStyleManager;
32class WhitespaceManager;
33
35 llvm::StringMap<FormatStyle> DelimiterStyle;
36 llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
37
39
40 std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
41
42 std::optional<FormatStyle>
43 getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
44};
45
47public:
48 /// Constructs a \c ContinuationIndenter to format \p Line starting in
49 /// column \p FirstIndent.
51 const AdditionalKeywords &Keywords,
52 const SourceManager &SourceMgr,
53 WhitespaceManager &Whitespaces,
54 encoding::Encoding Encoding,
55 bool BinPackInconclusiveFunctions);
56
57 /// Get the initial state, i.e. the state after placing \p Line's
58 /// first token at \p FirstIndent. When reformatting a fragment of code, as in
59 /// the case of formatting inside raw string literals, \p FirstStartColumn is
60 /// the column at which the state of the parent formatter is.
61 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
62 const AnnotatedLine *Line, bool DryRun);
63
64 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
65 // better home.
66 /// Returns \c true, if a line break after \p State is allowed.
67 bool canBreak(const LineState &State);
68
69 /// Returns \c true, if a line break after \p State is mandatory.
70 bool mustBreak(const LineState &State);
71
72 /// Appends the next token to \p State and updates information
73 /// necessary for indentation.
74 ///
75 /// Puts the token on the current line if \p Newline is \c false and adds a
76 /// line break and necessary indentation otherwise.
77 ///
78 /// If \p DryRun is \c false, also creates and stores the required
79 /// \c Replacement.
80 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
81 unsigned ExtraSpaces = 0);
82
83 /// Get the column limit for this line. This is the style's column
84 /// limit, potentially reduced for preprocessor definitions.
85 unsigned getColumnLimit(const LineState &State) const;
86
87private:
88 /// Mark the next token as consumed in \p State and modify its stacks
89 /// accordingly.
90 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
91
92 /// Update 'State' according to the next token's fake left parentheses.
93 void moveStatePastFakeLParens(LineState &State, bool Newline);
94 /// Update 'State' according to the next token's fake r_parens.
95 void moveStatePastFakeRParens(LineState &State);
96
97 /// Update 'State' according to the next token being one of "(<{[".
98 void moveStatePastScopeOpener(LineState &State, bool Newline);
99 /// Update 'State' according to the next token being one of ")>}]".
100 void moveStatePastScopeCloser(LineState &State);
101 /// Update 'State' with the next token opening a nested block.
102 void moveStateToNewBlock(LineState &State, bool NewLine);
103
104 /// Reformats a raw string literal.
105 ///
106 /// \returns An extra penalty induced by reformatting the token.
107 unsigned reformatRawStringLiteral(const FormatToken &Current,
108 LineState &State,
109 const FormatStyle &RawStringStyle,
110 bool DryRun, bool Newline);
111
112 /// If the current token is at the end of the current line, handle
113 /// the transition to the next line.
114 unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
115 bool DryRun, bool AllowBreak, bool Newline);
116
117 /// If \p Current is a raw string that is configured to be reformatted,
118 /// return the style to be used.
119 std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
120 const LineState &State);
121
122 /// If the current token sticks out over the end of the line, break
123 /// it if possible.
124 ///
125 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
126 /// when tokens are broken or lines exceed the column limit, and exceeded
127 /// indicates whether the algorithm purposefully left lines exceeding the
128 /// column limit.
129 ///
130 /// The returned penalty will cover the cost of the additional line breaks
131 /// and column limit violation in all lines except for the last one. The
132 /// penalty for the column limit violation in the last line (and in single
133 /// line tokens) is handled in \c addNextStateToQueue.
134 ///
135 /// \p Strict indicates whether reflowing is allowed to leave characters
136 /// protruding the column limit; if true, lines will be split strictly within
137 /// the column limit where possible; if false, words are allowed to protrude
138 /// over the column limit as long as the penalty is less than the penalty
139 /// of a break.
140 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
141 LineState &State,
142 bool AllowBreak, bool DryRun,
143 bool Strict);
144
145 /// Returns the \c BreakableToken starting at \p Current, or nullptr
146 /// if the current token cannot be broken.
147 std::unique_ptr<BreakableToken>
148 createBreakableToken(const FormatToken &Current, LineState &State,
149 bool AllowBreak);
150
151 /// Appends the next token to \p State and updates information
152 /// necessary for indentation.
153 ///
154 /// Puts the token on the current line.
155 ///
156 /// If \p DryRun is \c false, also creates and stores the required
157 /// \c Replacement.
158 void addTokenOnCurrentLine(LineState &State, bool DryRun,
159 unsigned ExtraSpaces);
160
161 /// Appends the next token to \p State and updates information
162 /// necessary for indentation.
163 ///
164 /// Adds a line break and necessary indentation.
165 ///
166 /// If \p DryRun is \c false, also creates and stores the required
167 /// \c Replacement.
168 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
169
170 /// Calculate the new column for a line wrap before the next token.
171 unsigned getNewLineColumn(const LineState &State);
172
173 /// Adds a multiline token to the \p State.
174 ///
175 /// \returns Extra penalty for the first line of the literal: last line is
176 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
177 /// matter, as we don't change them.
178 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
179
180 /// Returns \c true if the next token starts a multiline string
181 /// literal.
182 ///
183 /// This includes implicitly concatenated strings, strings that will be broken
184 /// by clang-format and string literals with escaped newlines.
185 bool nextIsMultilineString(const LineState &State);
186
187 FormatStyle Style;
188 const AdditionalKeywords &Keywords;
189 const SourceManager &SourceMgr;
190 WhitespaceManager &Whitespaces;
191 encoding::Encoding Encoding;
192 bool BinPackInconclusiveFunctions;
193 llvm::Regex CommentPragmasRegex;
194 const RawStringFormatStyleManager RawStringFormats;
195};
196
198 ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
199 bool AvoidBinPacking, bool NoLineBreak)
211
212 /// \brief The token opening this parenthesis level, or nullptr if this level
213 /// is opened by fake parenthesis.
214 ///
215 /// Not considered for memoization as it will always have the same value at
216 /// the same token.
218
219 /// The position to which a specific parenthesis level needs to be
220 /// indented.
221 unsigned Indent;
222
223 /// The position of the last space on each level.
224 ///
225 /// Used e.g. to break like:
226 /// functionCall(Parameter, otherCall(
227 /// OtherParameter));
228 unsigned LastSpace;
229
230 /// If a block relative to this parenthesis level gets wrapped, indent
231 /// it this much.
233
234 /// The position the first "<<" operator encountered on each level.
235 ///
236 /// Used to align "<<" operators. 0 if no such operator has been encountered
237 /// on a level.
238 unsigned FirstLessLess = 0;
239
240 /// The column of a \c ? in a conditional expression;
241 unsigned QuestionColumn = 0;
242
243 /// The position of the colon in an ObjC method declaration/call.
244 unsigned ColonPos = 0;
245
246 /// The start of the most recent function in a builder-type call.
248
249 /// Contains the start of array subscript expressions, so that they
250 /// can be aligned.
252
253 /// If a nested name specifier was broken over multiple lines, this
254 /// contains the start column of the second line. Otherwise 0.
256
257 /// If a call expression was broken over multiple lines, this
258 /// contains the start column of the second line. Otherwise 0.
259 unsigned CallContinuation = 0;
260
261 /// The column of the first variable name in a variable declaration.
262 ///
263 /// Used to align further variables if necessary.
264 unsigned VariablePos = 0;
265
266 /// Whether this block's indentation is used for alignment.
267 bool IsAligned : 1;
268
269 /// Whether a newline needs to be inserted before the block's closing
270 /// brace.
271 ///
272 /// We only want to insert a newline before the closing brace if there also
273 /// was a newline after the beginning left brace.
275
276 /// Whether a newline needs to be inserted before the block's closing
277 /// paren.
278 ///
279 /// We only want to insert a newline before the closing paren if there also
280 /// was a newline after the beginning left paren.
282
283 /// Avoid bin packing, i.e. multiple parameters/elements on multiple
284 /// lines, in this context.
286
287 /// Break after the next comma (or all the commas in this context if
288 /// \c AvoidBinPacking is \c true).
290
291 /// Line breaking in this context would break a formatting rule.
292 bool NoLineBreak : 1;
293
294 /// Same as \c NoLineBreak, but is restricted until the end of the
295 /// operand (including the next ",").
297
298 /// True if the last binary operator on this level was wrapped to the
299 /// next line.
301
302 /// \c true if this \c ParenState already contains a line-break.
303 ///
304 /// The first line break in a certain \c ParenState causes extra penalty so
305 /// that clang-format prefers similar breaks, i.e. breaks in the same
306 /// parenthesis.
308
309 /// \c true if this \c ParenState contains multiple segments of a
310 /// builder-type call on one line.
312
313 /// \c true if the colons of the curren ObjC method expression should
314 /// be aligned.
315 ///
316 /// Not considered for memoization as it will always have the same value at
317 /// the same token.
318 bool AlignColons : 1;
319
320 /// \c true if at least one selector name was found in the current
321 /// ObjC method expression.
322 ///
323 /// Not considered for memoization as it will always have the same value at
324 /// the same token.
326