13#ifndef LLVM_CLANG_LEX_LEXER_H
14#define LLVM_CLANG_LEX_LEXER_H
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringRef.h"
37class DiagnosticBuilder;
81 void anchor()
override;
87 const char *BufferStart;
90 const char *BufferEnd;
122 unsigned char ExtendedTokenMode;
131 const char *BufferPtr;
135 bool IsAtStartOfLine;
137 bool IsAtPhysicalStartOfLine;
139 bool HasLeadingSpace;
141 bool HasLeadingEmptyMacro;
144 bool IsFirstTimeLexingFile;
148 const char *NewLinePtr;
158 unsigned NextDepDirectiveTokenIndex = 0;
160 void InitLexer(
const char *BufStart,
const char *BufPtr,
const char *BufEnd);
168 bool IsFirstIncludeOfFile =
true);
174 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
175 bool IsFirstIncludeOfFile =
true);
182 bool IsFirstIncludeOfFile =
true);
211 bool LexDependencyDirectiveTokenWhileSkipping(
Token &
Result);
215 bool isDependencyDirectivesLexer()
const {
return !DepDirectives.empty(); }
220 const char *convertDependencyDirectiveToken(
221 const dependency_directives_scan::Token &DDTok, Token &
Result);
241 return BufferPtr == BufferEnd;
249 return ExtendedTokenMode > 1;
256 "Can only retain whitespace in raw mode or -traditional-cpp");
257 ExtendedTokenMode = Val ? 2 : 0;
263 return ExtendedTokenMode > 0;
271 "Can't play with comment retention state when retaining whitespace");
272 ExtendedTokenMode = Mode ? 1 : 0;
285 return StringRef(BufferStart, BufferEnd - BufferStart);
312 assert(BufferPtr >= BufferStart &&
"Invalid buffer state");
313 return BufferPtr - BufferStart;
317 void seek(
unsigned Offset,
bool IsAtStartOfLine);
322 static std::string
Stringify(StringRef Str,
bool Charify =
false);
365 bool *invalid =
nullptr);
380 bool IgnoreWhiteSpace =
false);
436 Range.getBegin(), End);
441 return Range.isTokenRange()
550 unsigned MaxLines = 0);
558 bool IncludeComments =
false);
564 bool IncludeComments);
575 bool SkipTrailingWhitespaceAndNewLine);
597 if (isObviouslySimpleCharacter(Ptr[0])) {
601 return getCharAndSizeSlowNoWarn(Ptr, LangOpts);
619 bool LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine);
621 bool CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
const char *CurPtr);
623 bool LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
const char *CurPtr);
630 void FormTokenWithChars(
Token &
Result,
const char *TokEnd,
632 unsigned TokLen = TokEnd-BufferPtr;
642 unsigned isNextPPTokenLParen();
666 static bool isObviouslySimpleCharacter(
char C) {
667 return C !=
'?' &&
C !=
'\\';
674 inline char getAndAdvanceChar(
const char *&Ptr, Token &Tok) {
677 if (isObviouslySimpleCharacter(Ptr[0]))
return *Ptr++;
679 auto [
C,
Size] = getCharAndSizeSlow(Ptr, &Tok);
688 const char *ConsumeChar(
const char *Ptr,
unsigned Size, Token &Tok) {
695 return Ptr + getCharAndSizeSlow(Ptr, &Tok).
Size;
702 inline char getCharAndSize(
const char *Ptr,
unsigned &Size) {
705 if (isObviouslySimpleCharacter(Ptr[0])) {
710 auto CharAndSize = getCharAndSizeSlow(Ptr);
711 Size = CharAndSize.Size;
712 return CharAndSize.Char;
717 SizedChar getCharAndSizeSlow(
const char *Ptr, Token *Tok =
nullptr);
722 static unsigned getEscapedNewLineSize(
const char *
P);
727 static const char *SkipEscapedNewLines(
const char *
P);
731 static SizedChar getCharAndSizeSlowNoWarn(
const char *Ptr,
732 const LangOptions &LangOpts);
737 void SetByteOffset(
unsigned Offset,
bool StartOfLine);
739 void PropagateLineStartLeadingSpaceInfo(Token &
Result);
741 const char *LexUDSuffix(Token &
Result,
const char *CurPtr,
742 bool IsStringLiteral);
748 bool LexIdentifierContinue(Token &
Result,
const char *CurPtr);
750 bool LexNumericConstant (Token &
Result,
const char *CurPtr);
751 bool LexStringLiteral (Token &
Result,
const char *CurPtr,
753 bool LexRawStringLiteral (Token &
Result,
const char *CurPtr,
755 bool LexAngledStringLiteral(Token &
Result,
const char *CurPtr);
756 bool LexCharConstant (Token &
Result,
const char *CurPtr,
758 bool LexEndOfFile (Token &
Result,
const char *CurPtr);
759 bool SkipWhitespace (Token &
Result,
const char *CurPtr,
760 bool &TokAtPhysicalStartOfLine);
761 bool SkipLineComment (Token &
Result,
const char *CurPtr,
762 bool &TokAtPhysicalStartOfLine);
763 bool SkipBlockComment (Token &
Result,
const char *CurPtr,
764 bool &TokAtPhysicalStartOfLine);
765 bool SaveLineComment (Token &
Result,
const char *CurPtr);
767 bool IsStartOfConflictMarker(
const char *CurPtr);
768 bool HandleEndOfConflictMarker(
const char *CurPtr);
770 bool lexEditorPlaceholder(Token &
Result,
const char *CurPtr);
772 bool isCodeCompletionPoint(
const char *CurPtr)
const;
773 void cutOffLexing() { BufferPtr = BufferEnd; }
775 bool isHexaLiteral(
const char *Start,
const LangOptions &LangOpts);
777 void codeCompleteIncludedFile(
const char *PathStart,
778 const char *CompletionPoint,
bool IsAngled);
780 std::optional<uint32_t>
781 tryReadNumericUCN(
const char *&StartPtr,
const char *SlashLoc, Token *
Result);
782 std::optional<uint32_t> tryReadNamedUCN(
const char *&StartPtr,
783 const char *SlashLoc, Token *
Result);
797 uint32_t tryReadUCN(
const char *&StartPtr,
const char *SlashLoc, Token *
Result);
810 bool tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
819 bool tryConsumeIdentifierUTF8Char(
const char *&CurPtr, Token &
Result);
enum clang::sema::@1727::IndirectLocalPathEntry::EntryKind Kind
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
Defines the clang::LangOptions interface.
Defines the PreprocessorLexer interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the clang::TokenKind enum and support functions.
__device__ __2f16 float c
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
A little helper class used to produce diagnostics.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
static std::optional< Token > findPreviousToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments)
Finds the token that comes before the given location.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Given a token range, produce a corresponding CharSourceRange that is not a token range.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
StringRef getBuffer() const
Gets source code buffer.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
unsigned getCurrentBufferOffset()
Returns the current lexing offset.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getFileLoc() const
getFileLoc - Return the File Location for the file we are lexing out of.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
Lexer & operator=(const Lexer &)=delete
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)
Finds the token that comes right after the given location.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
Lexer(const Lexer &)=delete
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
bool isFirstTimeLexingFile() const
Check if this is the first time we're lexing the input file.
bool LexingRawMode
True if in raw mode.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Token - This structure provides full information about a lexed token.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
@ Result
The result type of a method or function.
Diagnostic wrappers for TextAPI types for error reporting.
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
unsigned Size
Size of the preamble in bytes.
bool PreambleEndsAtStartOfLine
Whether the preamble ends at the start of a new line.
PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)