From a6f134d30f0007efa339fd434ac109749c30bb11 Mon Sep 17 00:00:00 2001 From: ThePhD Date: Sun, 8 Oct 2023 17:43:51 -0400 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Speedy=20#embed=20implementation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ⚡ [Lex] Better reservations for improved performance/memory usage. 🛠 [Lex, Frontend] Remove comma hardcoding since we are servicing a full file apply suggestions from git-clang-format --- clang/include/clang/AST/Expr.h | 51 +++ clang/include/clang/AST/RecursiveASTVisitor.h | 1 + .../clang/Basic/DiagnosticCommonKinds.td | 6 + clang/include/clang/Basic/FileManager.h | 5 +- clang/include/clang/Basic/StmtNodes.td | 1 + clang/include/clang/Basic/TokenKinds.def | 6 +- .../Frontend/PreprocessorOutputOptions.h | 3 +- .../include/clang/Lex/PPDirectiveParameter.h | 32 ++ clang/include/clang/Lex/PPEmbedParameters.h | 78 ++++ clang/include/clang/Lex/Preprocessor.h | 39 +- clang/include/clang/Sema/Sema.h | 37 ++ .../include/clang/Serialization/ASTBitCodes.h | 3 + clang/lib/AST/Expr.cpp | 16 + clang/lib/AST/ExprClassification.cpp | 5 + clang/lib/AST/ExprConstant.cpp | 8 + clang/lib/AST/ItaniumMangle.cpp | 1 + clang/lib/AST/StmtPrinter.cpp | 7 + clang/lib/AST/StmtProfile.cpp | 2 + clang/lib/Basic/FileManager.cpp | 1 - clang/lib/Basic/IdentifierTable.cpp | 6 +- clang/lib/Driver/ToolChains/Clang.cpp | 3 +- clang/lib/Format/TokenAnnotator.cpp | 3 +- clang/lib/Frontend/DependencyFile.cpp | 15 +- .../lib/Frontend/PrintPreprocessedOutput.cpp | 14 +- .../Frontend/Rewrite/InclusionRewriter.cpp | 16 +- clang/lib/Interpreter/Interpreter.cpp | 1 + clang/lib/Lex/Lexer.cpp | 8 + clang/lib/Lex/PPDirectives.cpp | 426 ++++++++++++++---- clang/lib/Lex/PPMacroExpansion.cpp | 23 +- clang/lib/Lex/Preprocessor.cpp | 6 +- clang/lib/Parse/ParseExpr.cpp | 101 ++++- clang/lib/Parse/ParseTemplate.cpp | 2 + clang/lib/Sema/SemaDecl.cpp | 48 ++ clang/lib/Sema/SemaDeclCXX.cpp | 3 +- clang/lib/Sema/SemaExceptionSpec.cpp | 1 + clang/lib/Sema/SemaExpr.cpp | 239 +++++++++- clang/lib/Sema/SemaTemplate.cpp | 56 +++ clang/lib/Sema/TreeTransform.h | 6 + clang/lib/Serialization/ASTReaderStmt.cpp | 13 + clang/lib/Serialization/ASTWriterStmt.cpp | 10 + clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 4 + clang/test/Preprocessor/embed_art.c | 106 +++++ clang/test/Preprocessor/embed_single_entity.c | 7 + clang/test/Preprocessor/embed_weird.cpp | 68 +++ llvm/include/llvm/Support/Base64.h | 36 +- 45 files changed, 1347 insertions(+), 176 deletions(-) create mode 100644 clang/include/clang/Lex/PPDirectiveParameter.h create mode 100644 clang/include/clang/Lex/PPEmbedParameters.h create mode 100644 clang/test/Preprocessor/embed_art.c create mode 100644 clang/test/Preprocessor/embed_single_entity.c create mode 100644 clang/test/Preprocessor/embed_weird.cpp diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index b69c616b009036..d3fba205c91c93 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -4805,6 +4805,57 @@ class SourceLocExpr final : public Expr { friend class ASTStmtReader; }; +/// Represents a function call to __builtin_pp_embed(). +class PPEmbedExpr final : public Expr { + SourceLocation BuiltinLoc, RParenLoc; + DeclContext *ParentContext; + StringLiteral *Filename; + StringLiteral *BinaryData; + +public: + enum Action { + NotFound, + FoundOne, + Expanded, + }; + + PPEmbedExpr(const ASTContext &Ctx, QualType ResultTy, StringLiteral *Filename, + StringLiteral *BinaryData, SourceLocation BLoc, + SourceLocation RParenLoc, DeclContext *Context); + + /// Build an empty call expression. + explicit PPEmbedExpr(EmptyShell Empty) : Expr(SourceLocExprClass, Empty) {} + + /// If the PPEmbedExpr has been resolved return the subexpression + /// representing the resolved value. Otherwise return null. + const DeclContext *getParentContext() const { return ParentContext; } + DeclContext *getParentContext() { return ParentContext; } + + SourceLocation getLocation() const { return BuiltinLoc; } + SourceLocation getBeginLoc() const { return BuiltinLoc; } + SourceLocation getEndLoc() const { return RParenLoc; } + + StringLiteral *getFilenameStringLiteral() const { return Filename; } + StringLiteral *getDataStringLiteral() const { return BinaryData; } + + size_t getDataElementCount(ASTContext &Context) const; + + child_range children() { + return child_range(child_iterator(), child_iterator()); + } + + const_child_range children() const { + return const_child_range(child_iterator(), child_iterator()); + } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == PPEmbedExprClass; + } + +private: + friend class ASTStmtReader; +}; + /// Describes an C or C++ initializer list. /// /// InitListExpr describes an initializer list, which can be used to diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 3dd23eb38eeabf..6b7211bb0a0d3f 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -2809,6 +2809,7 @@ DEF_TRAVERSE_STMT(ShuffleVectorExpr, {}) DEF_TRAVERSE_STMT(ConvertVectorExpr, {}) DEF_TRAVERSE_STMT(StmtExpr, {}) DEF_TRAVERSE_STMT(SourceLocExpr, {}) +DEF_TRAVERSE_STMT(PPEmbedExpr, {}) DEF_TRAVERSE_STMT(UnresolvedLookupExpr, { TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc())); diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index f2df283c74829f..4df86e35eebde3 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -59,6 +59,9 @@ def err_expected_string_literal : Error<"expected string literal " "'external_source_symbol' attribute|" "as argument of '%1' attribute}0">; +def err_builtin_pp_embed_invalid_argument : Error< + "invalid argument to '__builtin_pp_embed': %0">; + def err_invalid_string_udl : Error< "string literal with user-defined suffix cannot be used here">; def err_invalid_character_udl : Error< @@ -80,6 +83,9 @@ def err_expected : Error<"expected %0">; def err_expected_either : Error<"expected %0 or %1">; def err_expected_after : Error<"expected %1 after %0">; +def err_builtin_pp_embed_invalid_location : Error< + "'__builtin_pp_embed' in invalid location: %0%select{|%2}1">; + def err_param_redefinition : Error<"redefinition of parameter %0">; def warn_method_param_redefinition : Warning<"redefinition of method parameter %0">; def warn_method_param_declaration : Warning<"redeclaration of method parameter %0">, diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index c757f8775b425e..cbfcb292778e5f 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -282,8 +282,9 @@ class FileManager : public RefCountedBase { getBufferForFile(StringRef Filename, bool isVolatile = false, bool RequiresNullTerminator = true, std::optional MaybeLimit = std::nullopt) { - return getBufferForFileImpl(Filename, /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1), isVolatile, - RequiresNullTerminator); + return getBufferForFileImpl(Filename, + /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1), + isVolatile, RequiresNullTerminator); } private: diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index cec301dfca2817..e3be997dd1c86e 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -203,6 +203,7 @@ def OpaqueValueExpr : StmtNode; def TypoExpr : StmtNode; def RecoveryExpr : StmtNode; def BuiltinBitCastExpr : StmtNode; +def PPEmbedExpr : StmtNode; // Microsoft Extensions. def MSPropertyRefExpr : StmtNode; diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 19a66fbb073119..167bd614efe7bd 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -154,10 +154,6 @@ TOK(eod) // End of preprocessing directive (end of line inside a // directive). TOK(code_completion) // Code completion marker -// #embed speed support -TOK(builtin_embed) - - // C99 6.4.9: Comments. TOK(comment) // Comment (only in -E -C[C] mode) @@ -758,6 +754,7 @@ ALIAS("__char32_t" , char32_t , KEYCXX) KEYWORD(__builtin_bit_cast , KEYALL) KEYWORD(__builtin_available , KEYALL) KEYWORD(__builtin_sycl_unique_stable_name, KEYSYCL) +KEYWORD(__builtin_pp_embed , KEYALL) // Keywords defined by Attr.td. #ifndef KEYWORD_ATTRIBUTE @@ -993,6 +990,7 @@ ANNOTATION(repl_input_end) #undef CXX11_KEYWORD #undef KEYWORD #undef PUNCTUATOR +#undef BUILTINOK #undef TOK #undef C99_KEYWORD #undef C23_KEYWORD diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h index 3e36db3f8ce46e..0bc32c65a58d2d 100644 --- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h +++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h @@ -22,7 +22,8 @@ class PreprocessorOutputOptions { unsigned ShowMacroComments : 1; ///< Show comments, even in macros. unsigned ShowMacros : 1; ///< Print macro definitions. unsigned ShowIncludeDirectives : 1; ///< Print includes, imports etc. within preprocessed output. - unsigned ShowEmbedDirectives : 1; ///< Print embeds, etc. within preprocessed output. + unsigned ShowEmbedDirectives : 1; ///< Print embeds, etc. within preprocessed + ///< output. unsigned RewriteIncludes : 1; ///< Preprocess include directives only. unsigned RewriteImports : 1; ///< Include contents of transitively-imported modules. unsigned MinimizeWhitespace : 1; ///< Ignore whitespace from input. diff --git a/clang/include/clang/Lex/PPDirectiveParameter.h b/clang/include/clang/Lex/PPDirectiveParameter.h new file mode 100644 index 00000000000000..fc413c345adc53 --- /dev/null +++ b/clang/include/clang/Lex/PPDirectiveParameter.h @@ -0,0 +1,32 @@ +//===--- MacroArgs.h - Formal argument info for Macros ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the MacroArgs interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_PPDIRECTIVEPARAMETER_H +#define LLVM_CLANG_LEX_PPDIRECTIVEPARAMETER_H + +#include "clang/Basic/SourceLocation.h" + +namespace clang { + +/// Captures basic information about a preprocessor directive parameter. +class PPDirectiveParameter { +public: + SourceLocation Start; + SourceLocation End; + + PPDirectiveParameter(SourceLocation Start, SourceLocation End) + : Start(Start), End(End) {} +}; + +} // end namespace clang + +#endif diff --git a/clang/include/clang/Lex/PPEmbedParameters.h b/clang/include/clang/Lex/PPEmbedParameters.h new file mode 100644 index 00000000000000..7b76d2d573c23b --- /dev/null +++ b/clang/include/clang/Lex/PPEmbedParameters.h @@ -0,0 +1,78 @@ +//===--- MacroArgs.h - Formal argument info for Macros ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the MacroArgs interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_PPEMBEDPARAMETERS_H +#define LLVM_CLANG_LEX_PPEMBEDPARAMETERS_H + +#include "clang/Lex/PPDirectiveParameter.h" +#include "clang/Lex/Token.h" +#include "llvm/ADT/SmallVector.h" + +namespace clang { + +/// Preprocessor extension embed parameter "clang::offset" +/// `clang::offset( constant-expression )` +class PPEmbedParameterOffset : public PPDirectiveParameter { +public: + size_t Offset; + + PPEmbedParameterOffset(size_t Offset, SourceLocation Start, + SourceLocation End) + : Offset(Offset), PPDirectiveParameter(Start, End) {} +}; + +/// Preprocessor standard embed parameter "limit" +/// `limit( constant-expression )` +class PPEmbedParameterLimit : public PPDirectiveParameter { +public: + size_t Limit; + + PPEmbedParameterLimit(size_t Limit, SourceLocation Start, SourceLocation End) + : Limit(Limit), PPDirectiveParameter(Start, End) {} +}; + +/// Preprocessor standard embed parameter "prefix" +/// `prefix( balanced-token-seq )` +class PPEmbedParameterPrefix : public PPDirectiveParameter { +public: + SmallVector Tokens; + + PPEmbedParameterPrefix(SmallVector Tokens, SourceLocation Start, + SourceLocation End) + : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {} +}; + +/// Preprocessor standard embed parameter "suffix" +/// `suffix( balanced-token-seq )` +class PPEmbedParameterSuffix : public PPDirectiveParameter { +public: + SmallVector Tokens; + + PPEmbedParameterSuffix(SmallVector Tokens, SourceLocation Start, + SourceLocation End) + : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {} +}; + +/// Preprocessor standard embed parameter "if_empty" +/// `if_empty( balanced-token-seq )` +class PPEmbedParameterIfEmpty : public PPDirectiveParameter { +public: + SmallVector Tokens; + + PPEmbedParameterIfEmpty(SmallVector Tokens, SourceLocation Start, + SourceLocation End) + : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {} +}; + +} // end namespace clang + +#endif diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 7470bf5882730c..3e897cf0c8c759 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -29,6 +29,7 @@ #include "clang/Lex/ModuleLoader.h" #include "clang/Lex/ModuleMap.h" #include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/PPEmbedParameters.h" #include "clang/Lex/Token.h" #include "clang/Lex/TokenLexer.h" #include "llvm/ADT/APSInt.h" @@ -1165,6 +1166,9 @@ class Preprocessor { void updateOutOfDateIdentifier(IdentifierInfo &II) const; + /// Buffers for used #embed directives + std::vector EmbedBuffers; + public: Preprocessor(std::shared_ptr PPOpts, DiagnosticsEngine &diags, const LangOptions &LangOpts, @@ -1735,15 +1739,15 @@ class Preprocessor { bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true); struct LexEmbedParametersResult { - bool Successful; - std::optional MaybeLimitParam; - std::optional MaybeOffsetParam; - std::optional> MaybeIfEmptyParam; - std::optional> MaybePrefixParam; - std::optional> MaybeSuffixParam; - int UnrecognizedParams; + std::optional MaybeLimitParam; + std::optional MaybeOffsetParam; + std::optional MaybeIfEmptyParam; + std::optional MaybePrefixParam; + std::optional MaybeSuffixParam; SourceLocation StartLoc; SourceLocation EndLoc; + int UnrecognizedParams; + bool Successful; }; LexEmbedParametersResult LexEmbedParameters(Token &Current, @@ -1812,7 +1816,8 @@ class Preprocessor { /// Parses a simple integer literal to get its numeric value. Floating /// point literals and user defined literals are rejected. Used primarily to /// handle pragmas that accept integer arguments. - bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value); + bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value, + bool WithLex = true); /// Disables macro expansion everywhere except for preprocessor directives. void SetMacroExpansionOnlyInDirectives() { @@ -2735,12 +2740,18 @@ class Preprocessor { // Binary data inclusion void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok, const FileEntry *LookupFromFile = nullptr); - void HandleEmbedDirectiveNaive( - SourceLocation FilenameTok, LexEmbedParametersResult &Params, - StringRef BinaryContents, const size_t TargetCharWidth); - void HandleEmbedDirectiveBuiltin( - SourceLocation FilenameTok, LexEmbedParametersResult &Params, - StringRef BinaryContents, const size_t TargetCharWidth); + void HandleEmbedDirectiveNaive(SourceLocation HashLoc, + SourceLocation FilenameTok, + const LexEmbedParametersResult &Params, + StringRef BinaryContents, + const size_t TargetCharWidth); + void HandleEmbedDirectiveBuiltin(SourceLocation HashLoc, + const Token &FilenameTok, + StringRef ResolvedFilename, + StringRef SearchPath, StringRef RelativePath, + const LexEmbedParametersResult &Params, + StringRef BinaryContents, + const size_t TargetCharWidth); // File inclusion. void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 1c88855a73970d..a8017fd44dbfe1 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -5983,6 +5983,10 @@ class Sema final { ArrayRef Arg, SourceLocation RParenLoc, Expr *Config = nullptr, bool IsExecConfig = false, ADLCallKind UsesADL = ADLCallKind::NotADL); + /// `Fn` may be a null pointer. + void ModifyCallExprArguments(Expr *Fn, SourceLocation LParenLoc, + SmallVectorImpl &ArgExprs, + SourceLocation RParenLoc); ExprResult ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, MultiExprArg ExecConfig, @@ -6100,6 +6104,35 @@ class Sema final { SourceLocation BuiltinLoc, SourceLocation RPLoc); + // __builtin_pp_embed() + ExprResult ActOnPPEmbedExpr(SourceLocation BuiltinLoc, + SourceLocation Base64DataLocation, + SourceLocation RPLoc, StringLiteral *Filename, + QualType DataTy, std::vector BinaryData); + + IntegerLiteral *ExpandSinglePPEmbedExpr(PPEmbedExpr *PPEmbed); + + PPEmbedExpr::Action + CheckExprListForPPEmbedExpr(ArrayRef ExprList, + std::optional MaybeInitType); + PPEmbedExpr::Action + ExpandPPEmbedExprInExprList(ArrayRef ExprList, + SmallVectorImpl &OutputExprList, + bool ClearOutputFirst = true); + PPEmbedExpr::Action + ExpandPPEmbedExprInExprList(SmallVectorImpl &OutputList); + + enum PPEmbedExprContext { + PPEEC__StaticAssert, + PPEEC_StaticAssert, + }; + + StringRef GetLocationName(PPEmbedExprContext Context) const; + + bool DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation, + PPEmbedExprContext Context, + bool SingleAllowed = true); + // Build a potentially resolved SourceLocExpr. ExprResult BuildSourceLocExpr(SourceLocExpr::IdentKind Kind, QualType ResultTy, SourceLocation BuiltinLoc, @@ -8292,6 +8325,10 @@ class Sema final { SourceLocation EqualLoc, ParsedTemplateArgument DefaultArg); + void ModifyTemplateArguments( + const TemplateTy &Template, + SmallVectorImpl &TemplateArgs); + TemplateParameterList * ActOnTemplateParameterList(unsigned Depth, SourceLocation ExportLoc, diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 5c32fbc079c9a6..138c52bc8149fc 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1715,6 +1715,9 @@ enum StmtCode { /// A SourceLocExpr record. EXPR_SOURCE_LOC, + /// A PPEmbedExpr record. + EXPR_BUILTIN_PP_EMBED, + /// A ShuffleVectorExpr record. EXPR_SHUFFLE_VECTOR, diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 5d3b510df1ef9b..be369610271d9a 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -2329,6 +2329,21 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx, llvm_unreachable("unhandled case"); } +PPEmbedExpr::PPEmbedExpr(const ASTContext &Ctx, QualType ResultTy, + StringLiteral *Filename, StringLiteral *BinaryData, + SourceLocation BLoc, SourceLocation RParenLoc, + DeclContext *ParentContext) + : Expr(PPEmbedExprClass, ResultTy, VK_PRValue, OK_Ordinary), + BuiltinLoc(BLoc), RParenLoc(RParenLoc), ParentContext(ParentContext), + Filename(Filename), BinaryData(BinaryData) { + setDependence(ExprDependence::None); +} + +size_t PPEmbedExpr::getDataElementCount(ASTContext &Context) const { + return getDataStringLiteral()->getByteLength() / + (Context.getTypeSize(getType()) / Context.getTypeSize(Context.CharTy)); +} + InitListExpr::InitListExpr(const ASTContext &C, SourceLocation lbraceloc, ArrayRef initExprs, SourceLocation rbraceloc) : Expr(InitListExprClass, QualType(), VK_PRValue, OK_Ordinary), @@ -3547,6 +3562,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx, case CXXUuidofExprClass: case OpaqueValueExprClass: case SourceLocExprClass: + case PPEmbedExprClass: case ConceptSpecializationExprClass: case RequiresExprClass: case SYCLUniqueStableNameExprClass: diff --git a/clang/lib/AST/ExprClassification.cpp b/clang/lib/AST/ExprClassification.cpp index ffa7c6802ea6e1..fbbbd72b144571 100644 --- a/clang/lib/AST/ExprClassification.cpp +++ b/clang/lib/AST/ExprClassification.cpp @@ -204,6 +204,11 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) { case Expr::RequiresExprClass: return Cl::CL_PRValue; + case Expr::PPEmbedExprClass: + // Nominally, this just goes through as a PRValue until we actually expand + // it and check it. + return Cl::CL_PRValue; + // Make HLSL this reference-like case Expr::CXXThisExprClass: return Lang.HLSL ? Cl::CL_LValue : Cl::CL_PRValue; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 5a33e918db8e8c..804c56671aac93 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -8921,6 +8921,11 @@ class PointerExprEvaluator return true; } + bool VisitPPEmbedExpr(const PPEmbedExpr *E) { + llvm_unreachable("Not yet implemented for ExprConstant.cpp"); + return true; + } + bool VisitSYCLUniqueStableNameExpr(const SYCLUniqueStableNameExpr *E) { std::string ResultStr = E->ComputeName(Info.Ctx); @@ -16155,6 +16160,9 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) { return ICEDiag(IK_NotICE, E->getBeginLoc()); return CheckICE(cast(E)->getSubExpr(), Ctx); } + case Expr::PPEmbedExprClass: { + return ICEDiag(IK_ICE, E->getBeginLoc()); + } } llvm_unreachable("Invalid StmtClass!"); diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 23ec35cae4b7b4..f08fb766efd777 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -4721,6 +4721,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity, case Expr::PseudoObjectExprClass: case Expr::AtomicExprClass: case Expr::SourceLocExprClass: + case Expr::PPEmbedExprClass: case Expr::BuiltinBitCastExprClass: { NotPrimaryExpr(); diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index a31aa0cfeeed8d..f94386be778847 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -49,6 +49,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Base64.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" @@ -1145,6 +1146,12 @@ void StmtPrinter::VisitSourceLocExpr(SourceLocExpr *Node) { OS << Node->getBuiltinStr() << "()"; } +void StmtPrinter::VisitPPEmbedExpr(PPEmbedExpr *Node) { + OS << "__builtin_pp_embed(" << Node->getType() << ", " + << Node->getFilenameStringLiteral()->getBytes() << ", \"" + << llvm::encodeBase64(Node->getDataStringLiteral()->getBytes()) << "\")"; +} + void StmtPrinter::VisitConstantExpr(ConstantExpr *Node) { PrintExpr(Node->getSubExpr()); } diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 22b6855b0fff23..0be044f54a819e 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -2284,6 +2284,8 @@ void StmtProfiler::VisitSourceLocExpr(const SourceLocExpr *E) { VisitExpr(E); } +void StmtProfiler::VisitPPEmbedExpr(const PPEmbedExpr *E) { VisitExpr(E); } + void StmtProfiler::VisitRecoveryExpr(const RecoveryExpr *E) { VisitExpr(E); } void StmtProfiler::VisitObjCStringLiteral(const ObjCStringLiteral *S) { diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp index e0e80b5e0fbedb..d8a5b56438ad33 100644 --- a/clang/lib/Basic/FileManager.cpp +++ b/clang/lib/Basic/FileManager.cpp @@ -549,7 +549,6 @@ FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile, if (MaybeLimit) FileSize = *MaybeLimit; - // If there's a high enough chance that the file have changed since we // got its size, force a stat before opening it. if (isVolatile || Entry->isNamedPipe()) diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index d2b5426d27bb3b..96ac3663ca6658 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -422,8 +422,8 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { // collisions (if there were, the switch below would complain about duplicate // case values). Note that this depends on 'if' being null terminated. -#define HASH(LEN, FIRST, THIRD) \ - (LEN << 6) + (((FIRST-'a') - (THIRD-'a')) & 63) +#define HASH(LEN, FIRST, THIRD) \ + (LEN << 6) + (((FIRST - 'a') - (THIRD - 'a')) & 63) #define CASE(LEN, FIRST, THIRD, NAME) \ case HASH(LEN, FIRST, THIRD): \ return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME @@ -438,7 +438,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { CASE( 4, 'e', 's', else); CASE( 4, 'l', 'n', line); CASE( 4, 's', 'c', sccs); - CASE( 5, 'e', 'b', embed); + CASE(5, 'e', 'b', embed); CASE( 5, 'e', 'd', endif); CASE( 5, 'e', 'r', error); CASE( 5, 'i', 'e', ident); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a8d51179a9ba58..a9dc716b15a049 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1324,7 +1324,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, {options::OPT_D, options::OPT_U, options::OPT_I_Group, - options::OPT_F, options::OPT_index_header_map, options::OPT_EmbedPath_Group}); + options::OPT_F, options::OPT_index_header_map, + options::OPT_EmbedPath_Group}); // Add -Wp, and -Xpreprocessor if using the preprocessor. diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index e405a9085951dc..0a3c16f3a669c7 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1399,8 +1399,7 @@ class AnnotatingParser { if (Tok->isOneOf(Keywords.kw___has_include, Keywords.kw___has_include_next)) { parseHasInclude(); - } - else if (Tok->is(Keywords.kw___has_embed)) { + } else if (Tok->is(Keywords.kw___has_embed)) { parseHasEmbed(); } if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next && diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp index 10558b1d34bf62..04ddb92ff7f7b6 100644 --- a/clang/lib/Frontend/DependencyFile.cpp +++ b/clang/lib/Frontend/DependencyFile.cpp @@ -65,11 +65,11 @@ struct DepCollectorPPCallbacks : public PPCallbacks { /*IsMissing=*/false); } - void EmbedDirective(SourceLocation HashLoc, - StringRef FileName, bool IsAngled, - CharSourceRange FilenameRange, CharSourceRange ParametersRange, - OptionalFileEntryRef File, StringRef SearchPath, - StringRef RelativePath) override { + void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, + CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, + StringRef RelativePath) override { if (!File) DepCollector.maybeAddDependency(FileName, /*FromModule*/ false, @@ -97,14 +97,13 @@ struct DepCollectorPPCallbacks : public PPCallbacks { } void HasEmbed(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled, - OptionalFileEntryRef File) override { + OptionalFileEntryRef File) override { if (!File) return; StringRef Filename = llvm::sys::path::remove_leading_dotslash(File->getName()); DepCollector.maybeAddDependency(Filename, - /*FromModule=*/false, - false, + /*FromModule=*/false, false, /*IsModuleFile=*/false, &PP.getFileManager(), /*IsMissing=*/false); diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp index fb9baa92e6836d..1d93ad97305da8 100644 --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -107,9 +107,10 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { public: PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers, - bool defines, bool DumpIncludeDirectives, bool DumpEmbedDirectives, - bool UseLineDirectives, bool MinimizeWhitespace, - bool DirectivesOnly, bool KeepSystemIncludes) + bool defines, bool DumpIncludeDirectives, + bool DumpEmbedDirectives, bool UseLineDirectives, + bool MinimizeWhitespace, bool DirectivesOnly, + bool KeepSystemIncludes) : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os), DisableLineMarkers(lineMarkers), DumpDefines(defines), DumpIncludeDirectives(DumpIncludeDirectives), @@ -414,7 +415,7 @@ void PrintPPOutputPPCallbacks::EmbedDirective( if (DumpEmbedDirectives) { MoveToLine(HashLoc, /*RequireStartOfLine=*/true); *OS << "#embed " << (IsAngled ? '<' : '"') << FileName - << (IsAngled ? '>' : '"') << " /* clang -E -dE */"; + << (IsAngled ? '>' : '"') << " /* clang -E -dE */"; setEmittedDirectiveOnThisLine(); } } @@ -1002,8 +1003,9 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks( PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros, - Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives, Opts.UseLineDirectives, - Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes); + Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives, + Opts.UseLineDirectives, Opts.MinimizeWhitespace, Opts.DirectivesOnly, + Opts.KeepSystemIncludes); // Expand macros in pragmas with -fms-extensions. The assumption is that // the majority of pragmas in such a file will be Microsoft pragmas. diff --git a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp index 4a73946951fd9c..e7d0548fef3353 100644 --- a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp +++ b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp @@ -463,11 +463,11 @@ void InclusionRewriter::Process(FileID FileId, // Add line marker to indicate we're returning from an included // file. LineInfoExtra = " 2"; - } - // fix up lineinfo (since commented out directive changed line - // numbers) for inclusions that were skipped due to header guards - WriteLineInfo(FileName, Line, FileType, LineInfoExtra); - break; + } + // fix up lineinfo (since commented out directive changed line + // numbers) for inclusions that were skipped due to header guards + WriteLineInfo(FileName, Line, FileType, LineInfoExtra); + break; } case tok::pp_pragma: { StringRef Identifier = NextIdentifierName(RawLex, RawToken); @@ -475,7 +475,7 @@ void InclusionRewriter::Process(FileID FileId, if (NextIdentifierName(RawLex, RawToken) == "system_header") { // keep the directive in, commented out CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL, - NextToWrite, Line); + NextToWrite, Line); // update our own type FileType = SM.getFileCharacteristic(RawToken.getLocation()); WriteLineInfo(FileName, Line, FileType); @@ -483,7 +483,7 @@ void InclusionRewriter::Process(FileID FileId, } else if (Identifier == "once") { // keep the directive in, commented out CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL, - NextToWrite, Line); + NextToWrite, Line); WriteLineInfo(FileName, Line, FileType); } break; @@ -535,7 +535,7 @@ void InclusionRewriter::Process(FileID FileId, OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(RawToken.getLocation()) + RawToken.getLength(), - LocalEOL, Line, /*EnsureNewline=*/ true); + LocalEOL, Line, /*EnsureNewline=*/true); WriteLineInfo(FileName, Line, FileType); RawLex.SetKeepWhitespaceMode(false); break; diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index 7968c62cbd3e7b..e2e55daa77b854 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -566,6 +566,7 @@ class RuntimeInterfaceBuilder CStyleCastPtrExpr(S, Ctx.VoidPtrTy, (uintptr_t)Ty.getAsOpaquePtr()); // The QualType parameter `OpaqueType`, represented as `void*`. Args.push_back(TypeArg); + S.ModifyCallExprArguments(nullptr, E->getBeginLoc(), Args, E->getEndLoc()); // We push the last parameter based on the type of the Expr. Note we need // special care for rvalue struct. diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index feed1b9ecd71a8..b55b4c360d4429 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -417,6 +417,14 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, } } + // NOTE: this is to prevent a few cases where token streams with + // commas are used to print with pseudo-locations after a faux-expansion + // cause reading a bogus location from a source file that does not exist. + if (Tok.is(tok::comma)) { + Buffer = ","; + return 1; + } + // NOTE: this can be checked even after testing for an IdentifierInfo. if (Tok.isLiteral()) TokStart = Tok.getLiteralData(); diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index e0d98d7ca03fa1..85b38537d75cbe 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -42,11 +42,13 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/AlignOf.h" +#include "llvm/Support/Base64.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Path.h" #include "llvm/Support/SaveAndRestore.h" #include #include +#include #include #include #include @@ -3631,10 +3633,12 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed, SmallVector ParameterTokens; tok::TokenKind EndTokenKind = InHasEmbed ? tok::r_paren : tok::eod; Result.StartLoc = CurTok.getLocation(); + Result.EndLoc = CurTok.getLocation(); for (LexNonComment(CurTok); CurTok.isNot(EndTokenKind);) { Parameter.clear(); // Lex identifier [:: identifier ...] if (!CurTok.is(tok::identifier)) { + Result.EndLoc = CurTok.getEndLoc(); Diag(CurTok, diag::err_expected) << "identifier"; DiscardUntilEndOfDirective(); return Result; @@ -3647,6 +3651,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed, Parameter.append("::"); LexNonComment(CurTok); if (!CurTok.is(tok::identifier)) { + Result.EndLoc = CurTok.getEndLoc(); Diag(CurTok, diag::err_expected) << "identifier"; DiscardUntilEndOfDirective(); return Result; @@ -3670,25 +3675,19 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed, return Result; } const llvm::APSInt &LimitResult = *LimitEvalResult.Value; - const bool ValueDoesNotFit = - LimitResult.getBitWidth() > 64 - ? true - : (LimitResult.isUnsigned() || - (LimitResult.isSigned() && LimitResult.isNegative())); - if (ValueDoesNotFit) { + if (LimitResult.getBitWidth() > 64) { Diag(CurTok, diag::warn_pp_expr_overflow); - // just truncate and roll with that, I guess? - Result.MaybeLimitParam = - static_cast(LimitResult.getRawData()[0]); - } else { - Result.MaybeLimitParam = - static_cast(LimitResult.getZExtValue()); } + size_t LimitValue = 0; + LimitValue = LimitResult.getLimitedValue(); + Result.MaybeLimitParam = PPEmbedParameterLimit{ + LimitValue, ParameterStartTok.getLocation(), CurTok.getEndLoc()}; LexNonComment(CurTok); } else if (Parameter == "clang::offset") { // we have a limit parameter and its internals are processed using // evaluation rules from #if - handle here if (CurTok.isNot(tok::l_paren)) { + Result.EndLoc = CurTok.getEndLoc(); Diag(CurTok, diag::err_pp_expected_after) << "(" << Parameter; DiscardUntilEndOfDirective(); return Result; @@ -3697,18 +3696,17 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed, DirectiveEvalResult OffsetEvalResult = EvaluateDirectiveExpression(ParameterIfNDef, CurTok, false, true); if (!OffsetEvalResult.Value) { + Result.EndLoc = CurTok.getEndLoc(); return Result; } const llvm::APSInt &OffsetResult = *OffsetEvalResult.Value; + size_t OffsetValue; if (OffsetResult.getBitWidth() > 64) { Diag(CurTok, diag::warn_pp_expr_overflow); - // just truncate and roll with that, I guess? - Result.MaybeOffsetParam = - static_cast(OffsetResult.getRawData()[0]); - } else { - Result.MaybeOffsetParam = - static_cast(OffsetResult.getZExtValue()); } + OffsetValue = OffsetResult.getLimitedValue(); + Result.MaybeOffsetParam = PPEmbedParameterOffset{ + OffsetValue, ParameterStartTok.getLocation(), CurTok.getEndLoc()}; LexNonComment(CurTok); } else { if (CurTok.is(tok::l_paren)) { @@ -3764,6 +3762,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed, return true; }; if (!ParseArgToken()) { + Result.EndLoc = CurTok.getEndLoc(); return Result; } if (!CurTok.is(tok::r_paren)) { @@ -3775,14 +3774,17 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed, } // "Token-soup" parameters if (Parameter == "if_empty") { - // TODO: integer list optimization - Result.MaybeIfEmptyParam = std::move(ParameterTokens); + Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{ + std::move(ParameterTokens), ParameterStartTok.getLocation(), + CurTok.getLocation()}; } else if (Parameter == "prefix") { - // TODO: integer list optimization - Result.MaybePrefixParam = std::move(ParameterTokens); + Result.MaybePrefixParam = PPEmbedParameterPrefix{ + std::move(ParameterTokens), ParameterStartTok.getLocation(), + CurTok.getLocation()}; } else if (Parameter == "suffix") { - // TODO: integer list optimization - Result.MaybeSuffixParam = std::move(ParameterTokens); + Result.MaybeSuffixParam = PPEmbedParameterSuffix{ + std::move(ParameterTokens), ParameterStartTok.getLocation(), + CurTok.getLocation()}; } else { ++Result.UnrecognizedParams; if (DiagnoseUnknown) { @@ -3793,6 +3795,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed, } } Result.Successful = true; + Result.EndLoc = CurTok.getEndLoc(); return Result; } @@ -3823,89 +3826,327 @@ inline constexpr const char *IntegerLiterals[] = { "242", "243", "244", "245", "246", "247", "248", "249", "250", "251", "252", "253", "254", "255"}; -void Preprocessor::HandleEmbedDirectiveNaive(SourceLocation FilenameLoc, - LexEmbedParametersResult &Params, - StringRef BinaryContents, - const size_t TargetCharWidth) { - (void)TargetCharWidth; // for later, when we support various sizes - size_t TokenIndex = 0; - const size_t InitListTokensSize = [&]() { - if (BinaryContents.empty()) { - if (Params.MaybeIfEmptyParam) { - return Params.MaybeIfEmptyParam->size(); +static size_t +ComputeNaiveReserveSize(const Preprocessor::LexEmbedParametersResult &Params, + StringRef TypeName, StringRef BinaryContents, + SmallVectorImpl &TokSpellingBuffer) { + size_t ReserveSize = 0; + if (BinaryContents.empty()) { + if (Params.MaybeIfEmptyParam) { + for (const auto &Tok : Params.MaybeIfEmptyParam->Tokens) { + const size_t TokLen = Tok.getLength(); + if (TokLen > TokSpellingBuffer.size()) { + TokSpellingBuffer.resize(TokLen); + } + ReserveSize += TokLen; + } + } + } else { + if (Params.MaybePrefixParam) { + for (const auto &Tok : Params.MaybePrefixParam->Tokens) { + const size_t TokLen = Tok.getLength(); + if (TokLen > TokSpellingBuffer.size()) { + TokSpellingBuffer.resize(TokLen); + } + ReserveSize += TokLen; + } + } + for (const auto &Byte : BinaryContents) { + ReserveSize += 3 + TypeName.size(); // ((type-name) + if (Byte > 99) { + ReserveSize += 3; // ### + } else if (Byte > 9) { + ReserveSize += 2; // ## } else { - return static_cast(0); + ReserveSize += 1; // # } - } else { - return static_cast( - (Params.MaybePrefixParam ? Params.MaybePrefixParam->size() : 0) + - (BinaryContents.size() * 2 - 1) + - (Params.MaybeSuffixParam ? Params.MaybeSuffixParam->size() : 0)); + ReserveSize += 2; // ), } - }(); - std::unique_ptr InitListTokens(new Token[InitListTokensSize]()); + if (Params.MaybePrefixParam) { + for (const auto &Tok : Params.MaybePrefixParam->Tokens) { + const size_t TokLen = Tok.getLength(); + if (TokLen > TokSpellingBuffer.size()) { + TokSpellingBuffer.resize(TokLen); + } + ReserveSize += TokLen; + } + } + } + return ReserveSize; +} +void Preprocessor::HandleEmbedDirectiveNaive( + SourceLocation HashLoc, SourceLocation FilenameLoc, + const LexEmbedParametersResult &Params, StringRef BinaryContents, + const size_t TargetCharWidth) { + // Load up a new embed buffer for this file and set of parameters in + // particular. + EmbedBuffers.push_back(""); + size_t EmbedBufferNumber = EmbedBuffers.size(); + std::string EmbedBufferNumberVal = std::to_string(EmbedBufferNumber); + llvm::Twine EmbedBufferName = [](const std::string &Number) { + llvm::Twine PrefixNumber = (""); + }(EmbedBufferNumberVal); + std::string &TargetEmbedBuffer = EmbedBuffers.back(); + const size_t TotalSize = BinaryContents.size(); + // In the future, this might change/improve. + const StringRef TypeName = "unsigned char"; + + SmallVector TokSpellingBuffer(32, 0); + const size_t ReserveSize = ComputeNaiveReserveSize( + Params, TypeName, BinaryContents, TokSpellingBuffer); + TargetEmbedBuffer.reserve(ReserveSize); + + // Generate the look-alike source file if (BinaryContents.empty()) { if (Params.MaybeIfEmptyParam) { - std::copy(Params.MaybeIfEmptyParam->begin(), - Params.MaybeIfEmptyParam->end(), InitListTokens.get()); - TokenIndex += Params.MaybeIfEmptyParam->size(); - assert(TokenIndex == InitListTokensSize); - EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true, - true); + const PPEmbedParameterIfEmpty &EmptyParam = *Params.MaybeIfEmptyParam; + for (const auto &Tok : EmptyParam.Tokens) { + StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer); + TargetEmbedBuffer.append(Spelling.data(), Spelling.size()); + } + } + } else { + if (Params.MaybePrefixParam) { + const PPEmbedParameterPrefix &PrefixParam = *Params.MaybePrefixParam; + for (const auto &Tok : PrefixParam.Tokens) { + StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer); + TargetEmbedBuffer.append(Spelling.data(), Spelling.size()); + } + } + for (size_t I = 0; I < TotalSize; ++I) { + unsigned char ByteValue = BinaryContents[I]; + StringRef ByteRepresentation = IntegerLiterals[ByteValue]; + TargetEmbedBuffer.append(2, '('); + TargetEmbedBuffer.append(TypeName.data(), TypeName.size()); + TargetEmbedBuffer.append(1, ')'); + TargetEmbedBuffer.append(ByteRepresentation.data(), + ByteRepresentation.size()); + TargetEmbedBuffer.append(1, ')'); + bool AtEndOfContents = I == (TotalSize - 1); + if (!AtEndOfContents) { + TargetEmbedBuffer.append(1, ','); + } + } + if (Params.MaybeSuffixParam) { + const PPEmbedParameterSuffix &SuffixParam = *Params.MaybeSuffixParam; + for (const auto &Tok : SuffixParam.Tokens) { + StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer); + TargetEmbedBuffer.append(Spelling.data(), Spelling.size()); + } } - return; } - // FIXME: this does not take the target's byte size into account; - // will fail on many DSPs and embedded machines! + // Create faux-file and its ID, backed by a memory buffer. + std::unique_ptr EmbedMemBuffer = + llvm::MemoryBuffer::getMemBufferCopy(TargetEmbedBuffer, EmbedBufferName); + assert(EmbedMemBuffer && "Cannot create predefined source buffer"); + FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer)); + assert(EmbedBufferFID.isValid() && + "Could not create FileID for #embed directive?"); + // Start parsing the look-alike source file for the embed directive and + // pretend everything is normal + // TODO: (Maybe? )Stop the PPCallbacks from considering this a Real File™. + EnterSourceFile(EmbedBufferFID, nullptr, HashLoc, false); +} + +static bool TokenListIsCharacterArray(Preprocessor &PP, + const size_t TargetCharWidth, + bool IsPrefix, + const SmallVectorImpl &Tokens, + llvm::SmallVectorImpl &Output) { + const bool IsSuffix = !IsPrefix; + size_t MaxValue = + static_cast(std::pow((size_t)2, TargetCharWidth)) - 1u; + size_t TokenIndex = 0; + // if it's a suffix, we are expecting a comma first + // if it's a prefix, we are expecting a numeric literal first + bool ExpectingNumericLiteral = IsPrefix; + const size_t TokensSize = Tokens.size(); + if (Tokens.empty()) { + return true; + } + for (; TokenIndex < TokensSize; + (void)++TokenIndex, ExpectingNumericLiteral = !ExpectingNumericLiteral) { + const Token &Tok = Tokens[TokenIndex]; + // TODO: parse an optional, PLAIN `(unsigned char)` cast in front of the + // literals, since the Spec technically decrees each element is of type + // `unsigned char` (unless we have a potential future extension for + // `clang::type(meow)` as an embed parameter + if (ExpectingNumericLiteral) { + if (Tok.isNot(tok::numeric_constant)) { + return false; + } + uint64_t Value = {}; + Token ParsingTok = Tok; + if (!PP.parseSimpleIntegerLiteral(ParsingTok, Value, false)) { + // numeric literal is a floating point literal or a UDL; too complex for + // us + return false; + } + if (Value > MaxValue || Value > static_cast(0xFF)) { + // number is too large + return false; + } + Output.push_back((char)Value); + } else { + if (Tok.isNot(tok::comma)) { + return false; + } + } + } + const bool EndedOnNumber = !ExpectingNumericLiteral; + if (IsPrefix && EndedOnNumber) { + // we ended on a number: this is a failure for prefix! + return false; + } + const bool EndedOnComma = ExpectingNumericLiteral; + if (IsSuffix && EndedOnComma) { + // we ended on a comma: this is a failure for suffix! + return false; + } + // if all tokens have been consumed by the above process, then we have + // succeeded. + return TokenIndex == TokensSize; +} + +static void TripleEncodeBase64(StringRef Bytes0, StringRef Bytes1, + StringRef Bytes2, std::string &OutputBuffer) { + static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + const size_t TotalSize = Bytes0.size() + Bytes1.size() + Bytes2.size(); + const size_t Bytes0Size = Bytes0.size(); + const size_t Bytes01Size = Bytes0.size() + Bytes1.size(); + const size_t IndexOffset = OutputBuffer.size(); + OutputBuffer.resize(OutputBuffer.size() + (((TotalSize + 2) / 3) * 4)); + auto IndexInto = [&](size_t i) -> unsigned char { + if (i >= Bytes0Size) { + if (i >= Bytes01Size) { + return Bytes2[i - Bytes01Size]; + } + return Bytes1[i - Bytes0Size]; + } + return Bytes0[i]; + }; + + size_t i = 0, j = 0; + for (size_t n = TotalSize / 3 * 3; i < n; i += 3, j += 4) { + uint32_t x = ((unsigned char)IndexInto(i) << 16) | + ((unsigned char)IndexInto(i + 1) << 8) | + (unsigned char)IndexInto(i + 2); + OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63]; + OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63]; + OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63]; + OutputBuffer[IndexOffset + j + 3] = Table[x & 63]; + } + if (i + 1 == TotalSize) { + uint32_t x = ((unsigned char)IndexInto(i) << 16); + OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63]; + OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63]; + OutputBuffer[IndexOffset + j + 2] = '='; + OutputBuffer[IndexOffset + j + 3] = '='; + } else if (i + 2 == TotalSize) { + uint32_t x = ((unsigned char)IndexInto(i) << 16) | + ((unsigned char)IndexInto(i + 1) << 8); + OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63]; + OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63]; + OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63]; + OutputBuffer[IndexOffset + j + 3] = '='; + } +} + +void Preprocessor::HandleEmbedDirectiveBuiltin( + SourceLocation HashLoc, const Token &FilenameTok, + StringRef ResolvedFilename, StringRef SearchPath, StringRef RelativePath, + const LexEmbedParametersResult &Params, StringRef BinaryContents, + const size_t TargetCharWidth) { + // if it's empty, just process it like a normal expanded token stream + if (BinaryContents.empty()) { + HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params, + BinaryContents, TargetCharWidth); + return; + } + SmallVector BinaryPrefix{}; + SmallVector BinarySuffix{}; if (Params.MaybePrefixParam) { - std::copy(Params.MaybePrefixParam->begin(), Params.MaybePrefixParam->end(), - InitListTokens.get() + TokenIndex); - TokenIndex += Params.MaybePrefixParam->size(); - } - for (size_t I = 0; I < BinaryContents.size(); ++I) { - unsigned char ByteValue = BinaryContents[I]; - StringRef ByteRepresentation = IntegerLiterals[ByteValue]; - const size_t InitListIndex = TokenIndex; - Token &IntToken = InitListTokens[InitListIndex]; - IntToken.setKind(tok::numeric_constant); - IntToken.setLiteralData(ByteRepresentation.data()); - IntToken.setLength(ByteRepresentation.size()); - IntToken.setLocation(FilenameLoc); - ++TokenIndex; - bool AtEndOfContents = I == (BinaryContents.size() - 1); - if (!AtEndOfContents) { - const size_t CommaInitListIndex = InitListIndex + 1; - Token &CommaToken = InitListTokens[CommaInitListIndex]; - CommaToken.setKind(tok::comma); - CommaToken.setLocation(FilenameLoc); - ++TokenIndex; + // If we ahve a prefix, validate that it's a good fit for direct data + // embedded (and prepare to prepend it) + const PPEmbedParameterPrefix &PrefixParam = *Params.MaybePrefixParam; + if (!TokenListIsCharacterArray(*this, TargetCharWidth, true, + PrefixParam.Tokens, BinaryPrefix)) { + HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params, + BinaryContents, TargetCharWidth); + return; } } if (Params.MaybeSuffixParam) { - std::copy(Params.MaybeSuffixParam->begin(), Params.MaybeSuffixParam->end(), - InitListTokens.get() + TokenIndex); - TokenIndex += Params.MaybeSuffixParam->size(); + // If we ahve a prefix, validate that it's a good fit for direct data + // embedding (and prepare to append it) + const PPEmbedParameterSuffix &SuffixParam = *Params.MaybeSuffixParam; + if (!TokenListIsCharacterArray(*this, TargetCharWidth, false, + SuffixParam.Tokens, BinarySuffix)) { + HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params, + BinaryContents, TargetCharWidth); + return; + } } - assert(TokenIndex == InitListTokensSize); - EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true, false); -} -void Preprocessor::HandleEmbedDirectiveBuiltin(SourceLocation FilenameLoc, - LexEmbedParametersResult &Params, - StringRef BinaryContents, - const size_t TargetCharWidth) { - // TODO: implement direct built-in support - HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents, - TargetCharWidth); + // Load up a new embed buffer for this file and set of parameters in + // particular. + EmbedBuffers.push_back(""); + size_t EmbedBufferNumber = EmbedBuffers.size(); + std::string EmbedBufferNumberVal = std::to_string(EmbedBufferNumber); + llvm::Twine EmbedBufferName = [](const std::string &Number) { + llvm::Twine PrefixNumber = (""); + }(EmbedBufferNumberVal); + std::string &TargetEmbedBuffer = EmbedBuffers.back(); + StringRef TypeName = "unsigned char"; + const size_t TotalSize = + BinaryPrefix.size() + BinaryContents.size() + BinarySuffix.size(); + const size_t ReserveSize = // add up for necessary size: + 19 // __builtin_pp_embed( + + TypeName.size() // type-name + + 2 // ," + + ResolvedFilename.size() // file-name + + 3 // "," + + (((TotalSize + 2) / 3) * 4) // base64-string + + 2 // "); + ; + // Reserve appropriate size + TargetEmbedBuffer.reserve(ReserveSize); + + // Generate the look-alike source file + TargetEmbedBuffer.append("__builtin_pp_embed("); + TargetEmbedBuffer.append(TypeName.data(), TypeName.size()); + TargetEmbedBuffer.append(",\""); + TargetEmbedBuffer.append(ResolvedFilename.data(), ResolvedFilename.size()); + TargetEmbedBuffer.append("\",\""); + // include the prefix(...) and suffix(...) binary data in the total contents + TripleEncodeBase64( + StringRef(BinaryPrefix.data(), BinaryPrefix.size()), BinaryContents, + StringRef(BinarySuffix.data(), BinarySuffix.size()), TargetEmbedBuffer); + TargetEmbedBuffer.append("\")"); + // Create faux-file and its ID, backed by a memory buffer. + std::unique_ptr EmbedMemBuffer = + llvm::MemoryBuffer::getMemBufferCopy(TargetEmbedBuffer, EmbedBufferName); + assert(EmbedMemBuffer && "Cannot create predefined source buffer"); + FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer)); + assert(EmbedBufferFID.isValid() && + "Could not create FileID for #embed directive?"); + // Start parsing the look-alike source file for the embed directive and + // pretend everything is normal + // TODO: (Maybe? )Stop the PPCallbacks from considering this a Real File™. + EnterSourceFile(EmbedBufferFID, nullptr, HashLoc, false); } void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, const FileEntry *LookupFromFile) { if (!LangOpts.C23 || !LangOpts.CPlusPlus26) { - auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_embed - : diag::warn_cxx26_pp_embed); + auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_cxx26_pp_embed + : diag::warn_c23_pp_embed); Diag(EmbedTok, EitherDiag); } @@ -3958,9 +4199,7 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, } std::optional MaybeSignedLimit{}; if (Params.MaybeLimitParam) { - if (static_cast(INT64_MAX) >= *Params.MaybeLimitParam) { - MaybeSignedLimit = static_cast(*Params.MaybeLimitParam); - } + MaybeSignedLimit = static_cast(Params.MaybeLimitParam->Limit); } llvm::ErrorOr> MaybeFile = getFileManager().getBufferForFile( *MaybeFileRef, false, false, MaybeSignedLimit); @@ -3973,7 +4212,7 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, StringRef BinaryContents = MaybeFile.get()->getBuffer(); if (Params.MaybeOffsetParam) { // offsets all the way to the end of the file make for an empty file. - const size_t OffsetParam = *Params.MaybeOffsetParam; + const size_t &OffsetParam = Params.MaybeOffsetParam->Offset; BinaryContents = BinaryContents.substr(OffsetParam); } const size_t TargetCharWidth = getTargetInfo().getCharWidth(); @@ -4009,11 +4248,12 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, RelativePath); } if (PPOpts->NoBuiltinPPEmbed) { - HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents, + HandleEmbedDirectiveNaive(HashLoc, FilenameLoc, Params, BinaryContents, TargetCharWidth); } else { // emit a token directly, handle it internally. - HandleEmbedDirectiveBuiltin(FilenameLoc, Params, BinaryContents, + HandleEmbedDirectiveBuiltin(HashLoc, FilenameTok, Filename, SearchPath, + RelativePath, Params, BinaryContents, TargetCharWidth); } } diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 6e0163ccc89b7f..7f6c964b0d68a3 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1270,8 +1270,8 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II, int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) { // pedwarn for not being on C23 if (!LangOpts.C23 || !LangOpts.CPlusPlus26) { - auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_has_embed - : diag::warn_cxx26_pp_has_embed); + auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_cxx26_pp_has_embed + : diag::warn_c23_pp_has_embed); Diag(Tok, EitherDiag); } @@ -1321,7 +1321,8 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) { SourceLocation FilenameLoc = Tok.getLocation(); Token FilenameTok = Tok; - Preprocessor::LexEmbedParametersResult Params = this->LexEmbedParameters(Tok, true, false); + Preprocessor::LexEmbedParametersResult Params = + this->LexEmbedParameters(Tok, true, false); if (!Params.Successful) { if (Tok.isNot(tok::eod)) this->DiscardUntilEndOfDirective(); @@ -1339,7 +1340,6 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) { return VALUE__STDC_EMBED_NOT_FOUND__; } - SmallString<128> FilenameBuffer; SmallString<256> RelativePath; StringRef Filename = this->getSpelling(FilenameTok, FilenameBuffer); @@ -1351,11 +1351,10 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) { assert(!Filename.empty()); const FileEntry *LookupFromFile = this->getCurrentFileLexer() ? this->getCurrentFileLexer()->getFileEntry() - : nullptr; + : nullptr; OptionalFileEntryRef MaybeFileEntry = this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false, - LookupFromFile, nullptr, - &RelativePath); + LookupFromFile, nullptr, &RelativePath); if (Callbacks) { Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry); } @@ -1363,11 +1362,15 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) { return VALUE__STDC_EMBED_NOT_FOUND__; } size_t FileSize = MaybeFileEntry->getSize(); - if (FileSize == 0 || - (Params.MaybeLimitParam ? *Params.MaybeLimitParam == 0 : false)) { + if (Params.MaybeLimitParam) { + if (FileSize > Params.MaybeLimitParam->Limit) { + FileSize = Params.MaybeLimitParam->Limit; + } + } + if (FileSize == 0) { return VALUE__STDC_EMBED_EMPTY__; } - if (Params.MaybeOffsetParam && *Params.MaybeOffsetParam >= FileSize) { + if (Params.MaybeOffsetParam && Params.MaybeOffsetParam->Offset >= FileSize) { return VALUE__STDC_EMBED_EMPTY__; } return VALUE__STDC_EMBED_FOUND__; diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index ede4c51487ffbe..10eb6d268b37b1 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -1411,7 +1411,8 @@ bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, return true; } -bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { +bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value, + bool WithLex) { assert(Tok.is(tok::numeric_constant)); SmallString<8> IntegerBuffer; bool NumberInvalid = false; @@ -1426,7 +1427,8 @@ bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { llvm::APInt APVal(64, 0); if (Literal.GetIntegerValue(APVal)) return false; - Lex(Tok); + if (WithLex) + Lex(Tok); Value = APVal.getLimitedValue(); return true; } diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 9dbfc1c8c5e9ff..f3ad1053b0949f 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -32,6 +32,7 @@ #include "clang/Sema/Scope.h" #include "clang/Sema/TypoCorrection.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Base64.h" #include using namespace clang; @@ -805,6 +806,7 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback { /// [MS] '__builtin_FUNCSIG' '(' ')' /// [GNU] '__builtin_LINE' '(' ')' /// [CLANG] '__builtin_COLUMN' '(' ')' +/// [CLANG] '__builtin_pp_embed' '(' type-name ',' string-literal ',' string-literal ')' /// [GNU] '__builtin_source_location' '(' ')' /// [GNU] '__builtin_types_compatible_p' '(' type-name ',' type-name ')' /// [GNU] '__null' @@ -1057,9 +1059,9 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind, isVectorLiteral, NotPrimaryExpression); case tok::identifier: - ParseIdentifier: { // primary-expression: identifier - // unqualified-id: identifier - // constant: enumeration-constant + ParseIdentifier: { // primary-expression: identifier + // unqualified-id: identifier + // constant: enumeration-constant // Turn a potentially qualified name into a annot_typename or // annot_cxxscope if it would be valid. This handles things like x::y, etc. if (getLangOpts().CPlusPlus) { @@ -1345,6 +1347,7 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind, case tok::kw___builtin_FUNCSIG: case tok::kw___builtin_LINE: case tok::kw___builtin_source_location: + case tok::kw___builtin_pp_embed: if (NotPrimaryExpression) *NotPrimaryExpression = true; // This parses the complete suffix; we can return early. @@ -2145,6 +2148,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { } else { Expr *Fn = LHS.get(); SourceLocation RParLoc = Tok.getLocation(); + Actions.ModifyCallExprArguments(Fn, Loc, ArgExprs, RParLoc); LHS = Actions.ActOnCallExpr(getCurScope(), Fn, Loc, ArgExprs, RParLoc, ExecConfig); if (LHS.isInvalid()) { @@ -2575,6 +2579,7 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() { /// [MS] '__builtin_FUNCSIG' '(' ')' /// [GNU] '__builtin_LINE' '(' ')' /// [CLANG] '__builtin_COLUMN' '(' ')' +/// [CLANG] '__builtin_pp_embed' '(' 'type-name ',' string-literal ',' string-literal ')' /// [GNU] '__builtin_source_location' '(' ')' /// [OCL] '__builtin_astype' '(' assignment-expression ',' type-name ')' /// @@ -2841,6 +2846,96 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() { Res = Actions.ActOnSourceLocExpr(Kind, StartLoc, ConsumeParen()); break; } + case tok::kw___builtin_pp_embed: { + SourceRange DataTyExprSourceRange{}; + TypeResult DataTyExpr(ParseTypeName(&DataTyExprSourceRange)); + + if (ExpectAndConsume(tok::comma)) { + SkipUntil(tok::r_paren, StopAtSemi); + Res = ExprError(); + } + + ExprResult FilenameArgExpr(ParseStringLiteralExpression()); + + if (ExpectAndConsume(tok::comma)) { + SkipUntil(tok::r_paren, StopAtSemi); + Res = ExprError(); + } + + ExprResult Base64ArgExpr(ParseStringLiteralExpression()); + + if (Tok.isNot(tok::r_paren)) { + Diag(Tok, diag::err_expected) << tok::r_paren; + Res = ExprError(); + } + + const ASTContext &Context = Actions.getASTContext(); + QualType DataTy = Context.UnsignedCharTy; + size_t TargetWidth = Context.getTypeSize(DataTy); + if (DataTyExpr.isInvalid()) { + Res = ExprError(); + } else { + DataTy = DataTyExpr.get().get().getCanonicalType(); + TargetWidth = Context.getTypeSize(DataTy); + if (DataTy.getUnqualifiedType() != Context.UnsignedCharTy && + DataTy.getUnqualifiedType() != Context.CharTy) { + // TODO: check if is exactly the same as unsigned char + Diag(DataTyExprSourceRange.getBegin(), + diag::err_builtin_pp_embed_invalid_argument) + << "only 'char' and 'unsigned char' are supported"; + Res = ExprError(); + } + if ((TargetWidth % CHAR_BIT) != 0) { + Diag(DataTyExprSourceRange.getBegin(), + diag::err_builtin_pp_embed_invalid_argument) + << "width of element type is not a multiple of host platform's " + "CHAR_BIT!"; + Res = ExprError(); + } + } + + StringLiteral *FilenameLiteral = nullptr; + if (FilenameArgExpr.isInvalid()) { + Res = ExprError(); + } else { + FilenameLiteral = FilenameArgExpr.getAs(); + } + + std::vector BinaryData{}; + if (Base64ArgExpr.isInvalid()) { + Res = ExprError(); + } else { + StringLiteral *Base64Str = Base64ArgExpr.getAs(); + StringRef Base64StrData = Base64Str->getBytes(); + if (Base64Str->getKind() != StringLiteral::Ordinary) { + Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal) + << 0 + << "'__builtin_pp_embed' with valid base64 encoding that is an " + "ordinary \"...\" string"; + } + const auto OnDecodeError = [&](const llvm::ErrorInfoBase &) { + Diag(Base64Str->getExprLoc(), + diag::err_builtin_pp_embed_invalid_argument) + << "expected a valid base64 encoded string"; + }; + llvm::Error Err = llvm::decodeBase64(Base64Str->getBytes(), BinaryData); + llvm::handleAllErrors(std::move(Err), OnDecodeError); + if (((BinaryData.size() * CHAR_BIT) % TargetWidth) != 0) { + Diag(DataTyExprSourceRange.getBegin(), + diag::err_builtin_pp_embed_invalid_argument) + << "size of data does not split evently into the number of bytes " + "requested"; + Res = ExprError(); + } + } + + if (!Res.isInvalid()) { + Res = Actions.ActOnPPEmbedExpr( + StartLoc, Base64ArgExpr.get()->getExprLoc(), ConsumeParen(), + FilenameLiteral, DataTy, std::move(BinaryData)); + } + break; + } } if (Res.isInvalid()) diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp index f556d0e6d4f8b6..8364519861fe4f 100644 --- a/clang/lib/Parse/ParseTemplate.cpp +++ b/clang/lib/Parse/ParseTemplate.cpp @@ -1671,6 +1671,8 @@ bool Parser::ParseTemplateArgumentList(TemplateArgList &TemplateArgs, // arguments. } while (TryConsumeToken(tok::comma)); + Actions.ModifyTemplateArguments(Template, TemplateArgs); + return false; } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 23b743d67a16b0..54b2b4287b5da0 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -13336,6 +13336,54 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) { return; } + // Adjust the init expression for PPEmbedExpr as early as possible + // here. + bool AlreadyAdjustedPPEmbedExpr = false; + if (InitListExpr *ILExpr = dyn_cast_if_present(Init); ILExpr) { + QualType VDeclTy = VDecl->getType(); + ArrayRef Inits = ILExpr->inits(); + if (CheckExprListForPPEmbedExpr(Inits, VDeclTy) == PPEmbedExpr::FoundOne) { + PPEmbedExpr *PPEmbed = dyn_cast_if_present(Inits[0]); + ILExpr->setInit(0, PPEmbed->getDataStringLiteral()); + AlreadyAdjustedPPEmbedExpr = true; + } + } + + if (!AlreadyAdjustedPPEmbedExpr) { + // If there is a PPEmbedExpr as a single initializer without braces, + // make sure it only produces a single element (and then expand said + // element). + if (PPEmbedExpr *PPEmbed = dyn_cast_if_present(Init); + PPEmbed) { + if (PPEmbed->getDataElementCount(Context) == 1) { + // Expand the list in-place immediately, let the natural work take hold + Init = ExpandSinglePPEmbedExpr(PPEmbed); + } else { + // `__builtin_pp_embed( ... )` only produces 2 or more values. + Diag(RealDecl->getLocation(), diag::err_illegal_initializer_type) + << "'__builtin_pp_embed'"; + RealDecl->setInvalidDecl(); + return; + } + } + + // Legitimately, in all other cases, COMPLETELY nuke the PPEmbedExpr + // and turn it into a list of integers where applicable. + if (InitListExpr *ILExpr = dyn_cast_if_present(Init); + ILExpr) { + ArrayRef Inits = ILExpr->inits(); + SmallVector OutputExprList{}; + if (ExpandPPEmbedExprInExprList(Inits, OutputExprList, false) == + PPEmbedExpr::Expanded) { + ILExpr->resizeInits(Context, OutputExprList.size()); + for (size_t I = 0; I < OutputExprList.size(); ++I) { + auto &InitExpr = OutputExprList[I]; + ILExpr->setInit(I, InitExpr); + } + } + } + } + // WebAssembly tables can't be used to initialise a variable. if (Init && !Init->getType().isNull() && Init->getType()->isWebAssemblyTableType()) { diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index f9c010b1a00248..37321d2417a7d2 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -17022,7 +17022,8 @@ Decl *Sema::ActOnStaticAssertDeclaration(SourceLocation StaticAssertLoc, SourceLocation RParenLoc) { if (DiagnoseUnexpandedParameterPack(AssertExpr, UPPC_StaticAssertExpression)) return nullptr; - + if (DiagnosePPEmbedExpr(AssertExpr, StaticAssertLoc, PPEEC_StaticAssert)) + return nullptr; return BuildStaticAssertDeclaration(StaticAssertLoc, AssertExpr, AssertMessageExpr, RParenLoc, false); } diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 75730ea888afb4..ebeed7f4d2b485 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1412,6 +1412,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Expr::SizeOfPackExprClass: case Expr::StringLiteralClass: case Expr::SourceLocExprClass: + case Expr::PPEmbedExprClass: case Expr::ConceptSpecializationExprClass: case Expr::RequiresExprClass: // These expressions can never throw. diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 9c5f96eebd0416..f1984c027f0601 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -7110,6 +7110,13 @@ static void DiagnosedUnqualifiedCallsToStdFunctions(Sema &S, << FixItHint::CreateInsertion(DRE->getLocation(), "std::"); } +void Sema::ModifyCallExprArguments(Expr *Fn, SourceLocation LParenLoc, + SmallVectorImpl &ArgExprs, + SourceLocation RParenLoc) { + [[maybe_unused]] PPEmbedExpr::Action Action = + ExpandPPEmbedExprInExprList(ArgExprs); +} + ExprResult Sema::ActOnCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc, MultiExprArg ArgExprs, SourceLocation RParenLoc, Expr *ExecConfig) { @@ -7947,8 +7954,17 @@ Sema::BuildInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList, } } - InitListExpr *E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList, - RBraceLoc); + InitListExpr *E = nullptr; + if (InitArgList.size() > 1 && + CheckExprListForPPEmbedExpr(InitArgList, std::nullopt) != + PPEmbedExpr::NotFound) { + SmallVector OutputExprList; + ExpandPPEmbedExprInExprList(InitArgList, OutputExprList); + E = new (Context) + InitListExpr(Context, LBraceLoc, OutputExprList, RBraceLoc); + } else { + E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList, RBraceLoc); + } E->setType(Context.VoidTy); // FIXME: just a place holder for now. return E; } @@ -17571,6 +17587,225 @@ ExprResult Sema::BuildSourceLocExpr(SourceLocExpr::IdentKind Kind, SourceLocExpr(Context, Kind, ResultTy, BuiltinLoc, RPLoc, ParentContext); } +ExprResult Sema::ActOnPPEmbedExpr(SourceLocation BuiltinLoc, + SourceLocation Base64DataLocation, + SourceLocation RPLoc, StringLiteral *Filename, + QualType ElementTy, + std::vector BinaryData) { + uint64_t ArraySizeRawVal[] = {BinaryData.size()}; + llvm::APSInt ArraySize(llvm::APInt(Context.getTypeSize(Context.getSizeType()), + 1, ArraySizeRawVal)); + QualType ArrayTy = Context.getConstantArrayType(ElementTy, ArraySize, nullptr, + ArrayType::Normal, 0); + StringLiteral *BinaryDataLiteral = StringLiteral::Create( + Context, StringRef(BinaryData.data(), BinaryData.size()), + StringLiteral::Ordinary, false, ArrayTy, Base64DataLocation); + return new (Context) + PPEmbedExpr(Context, ElementTy, Filename, BinaryDataLiteral, BuiltinLoc, + RPLoc, CurContext); +} + +IntegerLiteral *Sema::ExpandSinglePPEmbedExpr(PPEmbedExpr *PPEmbed) { + assert(PPEmbed->getDataElementCount(Context) == 1 && + "Data should only contain a single element"); + StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral(); + QualType ElementTy = PPEmbed->getType(); + const size_t TargetWidth = Context.getTypeSize(ElementTy); + const size_t BytesPerElement = CHAR_BIT / TargetWidth; + StringRef Data = DataLiteral->getBytes(); + SmallVector ByteVals{}; + for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) { + if ((ValIndex % sizeof(uint64_t)) == 0) { + ByteVals.push_back(0); + } + const unsigned char DataByte = Data[ValIndex]; + ByteVals.back() |= + (static_cast(DataByte) << (ValIndex * CHAR_BIT)); + } + ArrayRef ByteValsRef(ByteVals); + return IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef), + ElementTy, DataLiteral->getBeginLoc()); +} + +PPEmbedExpr::Action +Sema::CheckExprListForPPEmbedExpr(ArrayRef ExprList, + std::optional MaybeInitType) { + if (ExprList.empty()) { + return PPEmbedExpr::NotFound; + } + PPEmbedExpr *First = ExprList.size() == 1 + ? dyn_cast_if_present(ExprList[0]) + : nullptr; + if (First) { + // only one and it's an embed + if (MaybeInitType) { + // With the type information, we have a duty to check if it matches; + // if not, explode it out into a list of integer literals. + QualType &InitType = *MaybeInitType; + if (InitType->isArrayType()) { + const ArrayType *InitArrayType = InitType->getAsArrayTypeUnsafe(); + QualType InitElementTy = InitArrayType->getElementType(); + QualType PPEmbedExprElementTy = First->getType(); + const bool TypesMatch = + Context.typesAreCompatible(InitElementTy, PPEmbedExprElementTy) || + (InitElementTy->isCharType() && PPEmbedExprElementTy->isCharType()); + if (TypesMatch) { + // Keep the PPEmbedExpr, report that everything has been found. + return PPEmbedExpr::FoundOne; + } + } + } else { + // leave it, possibly adjusted later! + return PPEmbedExpr::FoundOne; + } + } + if (std::find_if(ExprList.begin(), ExprList.end(), + [](const Expr *const SomeExpr) { + return isa(SomeExpr); + }) == ExprList.end()) { + // We didn't find one. + return PPEmbedExpr::NotFound; + } + // Otherwise, we found one but it is not the sole entry in the initialization + // list. + return PPEmbedExpr::Expanded; +} + +PPEmbedExpr::Action +Sema::ExpandPPEmbedExprInExprList(SmallVectorImpl &ExprList) { + PPEmbedExpr::Action Action = PPEmbedExpr::NotFound; + SmallVector ByteVals{}; + for (size_t I = 0; I < ExprList.size();) { + Expr *&OriginalExpr = ExprList[I]; + PPEmbedExpr *PPEmbed = dyn_cast_if_present(OriginalExpr); + if (!PPEmbed) { + ++I; + continue; + } + auto ExprListIt = ExprList.erase(&OriginalExpr); + const size_t ExpectedDataElements = PPEmbed->getDataElementCount(Context); + if (ExpectedDataElements == 0) { + // No ++I, we are already pointing to newest element. + continue; + } + Action = PPEmbedExpr::Expanded; + StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral(); + QualType ElementTy = PPEmbed->getType(); + const size_t TargetWidth = Context.getTypeSize(ElementTy); + const size_t BytesPerElement = CHAR_BIT / TargetWidth; + StringRef Data = DataLiteral->getBytes(); + size_t Insertions = 0; + for (size_t ByteIndex = 0; ByteIndex < Data.size(); + ByteIndex += BytesPerElement) { + ByteVals.clear(); + for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) { + if ((ValIndex % sizeof(uint64_t)) == 0) { + ByteVals.push_back(0); + } + const unsigned char DataByte = Data[ByteIndex + ValIndex]; + ByteVals.back() |= + (static_cast(DataByte) << (ValIndex * CHAR_BIT)); + } + ArrayRef ByteValsRef(ByteVals); + IntegerLiteral *IntLit = + IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef), + ElementTy, DataLiteral->getBeginLoc()); + ExprListIt = ExprList.insert(ExprListIt, IntLit); + ++Insertions; + // make sure we are inserting **after** the item we just inserted, not + // before + ++ExprListIt; + } + assert(Insertions == ExpectedDataElements); + I += Insertions; + } + return PPEmbedExpr::Expanded; +} + +PPEmbedExpr::Action +Sema::ExpandPPEmbedExprInExprList(ArrayRef ExprList, + SmallVectorImpl &OutputExprList, + bool ClearOutputFirst) { + if (ClearOutputFirst) { + OutputExprList.clear(); + } + size_t ExpectedResize = OutputExprList.size() + ExprList.size(); + const auto FindPPEmbedExpr = [](const Expr *const SomeExpr) { + return isa(SomeExpr); + }; + if (std::find_if(ExprList.begin(), ExprList.end(), FindPPEmbedExpr) == + ExprList.end()) { + return PPEmbedExpr::NotFound; + } + SmallVector ByteVals{}; + OutputExprList.reserve(ExpectedResize); + for (size_t I = 0; I < ExprList.size(); ++I) { + Expr *OriginalExpr = ExprList[I]; + PPEmbedExpr *PPEmbed = dyn_cast_if_present(OriginalExpr); + if (!PPEmbed) { + OutputExprList.push_back(OriginalExpr); + continue; + } + StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral(); + QualType ElementTy = PPEmbed->getType(); + const size_t TargetWidth = Context.getTypeSize(ElementTy); + const size_t BytesPerElement = CHAR_BIT / TargetWidth; + StringRef Data = DataLiteral->getBytes(); + for (size_t ByteIndex = 0; ByteIndex < Data.size(); + ByteIndex += BytesPerElement) { + ByteVals.clear(); + for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) { + if ((ValIndex % sizeof(uint64_t)) == 0) { + ByteVals.push_back(0); + } + const unsigned char DataByte = Data[ByteIndex + ValIndex]; + ByteVals.back() |= + (static_cast(DataByte) << (ValIndex * CHAR_BIT)); + } + ArrayRef ByteValsRef(ByteVals); + IntegerLiteral *IntLit = + IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef), + ElementTy, DataLiteral->getBeginLoc()); + OutputExprList.push_back(IntLit); + } + } + return PPEmbedExpr::Expanded; +} + +StringRef Sema::GetLocationName(PPEmbedExprContext Context) const { + switch (Context) { + default: + llvm_unreachable("unhandled PPEmbedExprContext value"); + case PPEEC__StaticAssert: + return "_Static_assert"; + case PPEEC_StaticAssert: + return "static_assert"; + } +} + +bool Sema::DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation, + PPEmbedExprContext PPEmbedContext, + bool SingleAllowed) { + PPEmbedExpr *PPEmbed = dyn_cast_if_present(E); + if (!PPEmbed) + return true; + + if (SingleAllowed && PPEmbed->getDataElementCount(Context) == 1) { + E = ExpandSinglePPEmbedExpr(PPEmbed); + return true; + } + + StringRef LocationName = GetLocationName(PPEmbedContext); + StringRef DiagnosticMessage = + (SingleAllowed ? "cannot use a preprocessor embed that expands to " + "nothing or expands to " + "more than one item in " + : "cannot use a preprocessor embed in "); + Diag(ContextLocation, diag::err_builtin_pp_embed_invalid_location) + << DiagnosticMessage << 1 << LocationName; + return false; +} + bool Sema::CheckConversionToObjCLiteral(QualType DstType, Expr *&Exp, bool Diagnose) { if (!getLangOpts().ObjC) diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index ff370dd1e080b2..234e678c71b140 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -1623,6 +1623,62 @@ NamedDecl *Sema::ActOnNonTypeTemplateParameter(Scope *S, Declarator &D, return Param; } +void Sema::ModifyTemplateArguments( + const TemplateTy &Template, + SmallVectorImpl &TemplateArgs) { + SmallVector ByteVals{}; + for (size_t I = 0; I < TemplateArgs.size();) { + ParsedTemplateArgument &OriginalArg = TemplateArgs[I]; + if (OriginalArg.getKind() != ParsedTemplateArgument::NonType) { + ++I; + continue; + } + PPEmbedExpr *PPEmbed = dyn_cast(OriginalArg.getAsExpr()); + if (!PPEmbed) { + ++I; + continue; + } + auto TemplateArgListIt = TemplateArgs.erase(&OriginalArg); + const size_t ExpectedDataElements = PPEmbed->getDataElementCount(Context); + if (ExpectedDataElements == 0) { + // No ++I; already pointing at the right element! + continue; + } + StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral(); + QualType ElementTy = PPEmbed->getType(); + const size_t TargetWidth = Context.getTypeSize(ElementTy); + const size_t BytesPerElement = CHAR_BIT / TargetWidth; + StringRef Data = DataLiteral->getBytes(); + size_t Insertions = 0; + for (size_t ByteIndex = 0; ByteIndex < Data.size(); + ByteIndex += BytesPerElement) { + ByteVals.clear(); + for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) { + if ((ValIndex % sizeof(uint64_t)) == 0) { + ByteVals.push_back(0); + } + const unsigned char DataByte = Data[ByteIndex + ValIndex]; + ByteVals.back() |= + (static_cast(DataByte) << (ValIndex * CHAR_BIT)); + } + ArrayRef ByteValsRef(ByteVals); + IntegerLiteral *IntLit = + IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef), + ElementTy, DataLiteral->getBeginLoc()); + TemplateArgListIt = TemplateArgs.insert( + TemplateArgListIt, + ParsedTemplateArgument(ParsedTemplateArgument::NonType, IntLit, + OriginalArg.getLocation())); + ++Insertions; + // make sure we are inserting **after** the item we just inserted, not + // before + ++TemplateArgListIt; + } + assert(Insertions == ExpectedDataElements); + I += Insertions; + } +} + /// ActOnTemplateTemplateParameter - Called when a C++ template template /// parameter (e.g. T in template