diff --git a/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp b/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp index 1d85607e86b7ff..7c2a231101070d 100644 --- a/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp +++ b/clang-tools-extra/test/pp-trace/pp-trace-macro.cpp @@ -31,6 +31,15 @@ X // CHECK: MacroNameTok: __STDC_UTF_32__ // CHECK-NEXT: MacroDirective: MD_Define // CHECK: - Callback: MacroDefined +// CHECK-NEXT: MacroNameTok: __STDC_EMBED_NOT_FOUND__ +// CHECK-NEXT: MacroDirective: MD_Define +// CHECK: - Callback: MacroDefined +// CHECK-NEXT: MacroNameTok: __STDC_EMBED_FOUND__ +// CHECK-NEXT: MacroDirective: MD_Define +// CHECK: - Callback: MacroDefined +// CHECK-NEXT: MacroNameTok: __STDC_EMBED_EMPTY__ +// CHECK-NEXT: MacroDirective: MD_Define +// CHECK: - Callback: MacroDefined // CHECK: - Callback: MacroDefined // CHECK-NEXT: MacroNameTok: MACRO // CHECK-NEXT: MacroDirective: MD_Define diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 92e6025c95a8c2..9830b35faae12b 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -1502,6 +1502,7 @@ Attributes on Structured Bindings __cpp_structured_bindings C+ Designated initializers (N494) C99 C89 Array & element qualification (N2607) C23 C89 Attributes (N2335) C23 C89 +``#embed`` (N3017) C23 C89, C++ ============================================ ================================ ============= ============= Type Trait Primitives @@ -5664,3 +5665,26 @@ Compiling different TUs depending on these flags (including use of ``std::hardware_destructive_interference``) with different compilers, macro definitions, or architecture flags will lead to ODR violations and should be avoided. + +``#embed`` Parameters +===================== + +``clang::offset`` +----------------- +The ``clang::offset`` embed parameter may appear zero or one time in the +embed parameter sequence. Its preprocessor argument clause shall be present and +have the form: + +..code-block: text + + ( constant-expression ) + +and shall be an integer constant expression. The integer constant expression +shall not evaluate to a value less than 0. The token ``defined`` shall not +appear within the constant expression. + +The offset will be used when reading the contents of the embedded resource to +specify the starting offset to begin embedding from. The resources is treated +as being empty if the specified offset is larger than the number of bytes in +the resource. The offset will be applied *before* any ``limit`` parameters are +applied. diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index f2bf667636dc9b..3bc8cae4d8c86c 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -4799,6 +4799,164 @@ class SourceLocExpr final : public Expr { friend class ASTStmtReader; }; +/// Stores data related to a single #embed directive. +struct EmbedDataStorage { + StringLiteral *BinaryData; + size_t getDataElementCount() const { return BinaryData->getByteLength(); } +}; + +/// Represents a reference to #emded data. By default, this references the whole +/// range. Otherwise it represents a subrange of data imported by #embed +/// directive. Needed to handle nested initializer lists with #embed directives. +/// Example: +/// struct S { +/// int x, y; +/// }; +/// +/// struct T { +/// int x[2]; +/// struct S s +/// }; +/// +/// struct T t[] = { +/// #embed "data" // data contains 10 elements; +/// }; +/// +/// The resulting semantic form of initializer list will contain (EE stands +/// for EmbedExpr): +/// { {EE(first two data elements), {EE(3rd element), EE(4th element) }}, +/// { {EE(5th and 6th element), {EE(7th element), EE(8th element) }}, +/// { {EE(9th and 10th element), { zeroinitializer }}} +/// +/// EmbedExpr inside of a semantic initializer list and referencing more than +/// one element can only appear for arrays of scalars. +class EmbedExpr final : public Expr { + SourceLocation EmbedKeywordLoc; + IntegerLiteral *FakeChildNode = nullptr; + const ASTContext *Ctx = nullptr; + EmbedDataStorage *Data; + unsigned Begin = 0; + unsigned NumOfElements; + +public: + EmbedExpr(const ASTContext &Ctx, SourceLocation Loc, EmbedDataStorage *Data, + unsigned Begin, unsigned NumOfElements); + explicit EmbedExpr(EmptyShell Empty) : Expr(SourceLocExprClass, Empty) {} + + SourceLocation getLocation() const { return EmbedKeywordLoc; } + SourceLocation getBeginLoc() const { return EmbedKeywordLoc; } + SourceLocation getEndLoc() const { return EmbedKeywordLoc; } + + StringLiteral *getDataStringLiteral() const { return Data->BinaryData; } + EmbedDataStorage *getData() const { return Data; } + + unsigned getStartingElementPos() const { return Begin; } + size_t getDataElementCount() const { return NumOfElements; } + + // Allows accessing every byte of EmbedExpr data and iterating over it. + // An Iterator knows the EmbedExpr that it refers to, and an offset value + // within the data. + // Dereferencing an Iterator results in construction of IntegerLiteral AST + // node filled with byte of data of the corresponding EmbedExpr within offset + // that the Iterator currently has. + template + class ChildElementIter + : public llvm::iterator_facade_base< + ChildElementIter, std::random_access_iterator_tag, + std::conditional_t> { + friend class EmbedExpr; + + EmbedExpr *EExpr = nullptr; + unsigned long long CurOffset = ULLONG_MAX; + using BaseTy = typename ChildElementIter::iterator_facade_base; + + ChildElementIter(EmbedExpr *E) : EExpr(E) { + if (E) + CurOffset = E->getStartingElementPos(); + } + + public: + ChildElementIter() : CurOffset(ULLONG_MAX) {} + typename BaseTy::reference operator*() const { + assert(EExpr && CurOffset != ULLONG_MAX && + "trying to dereference an invalid iterator"); + IntegerLiteral *N = EExpr->FakeChildNode; + StringRef DataRef = EExpr->Data->BinaryData->getBytes(); + N->setValue(*EExpr->Ctx, + llvm::APInt(N->getValue().getBitWidth(), DataRef[CurOffset], + N->getType()->isSignedIntegerType())); + // We want to return a reference to the fake child node in the + // EmbedExpr, not the local variable N. + return const_cast(EExpr->FakeChildNode); + } + typename BaseTy::pointer operator->() const { return **this; } + using BaseTy::operator++; + ChildElementIter &operator++() { + assert(EExpr && "trying to increment an invalid iterator"); + assert(CurOffset != ULLONG_MAX && + "Already at the end of what we can iterate over"); + if (++CurOffset >= + EExpr->getDataElementCount() + EExpr->getStartingElementPos()) { + CurOffset = ULLONG_MAX; + EExpr = nullptr; + } + return *this; + } + bool operator==(ChildElementIter Other) const { + return (EExpr == Other.EExpr && CurOffset == Other.CurOffset); + } + }; // class ChildElementIter + +public: + using fake_child_range = llvm::iterator_range>; + using const_fake_child_range = llvm::iterator_range>; + + fake_child_range underlying_data_elements() { + return fake_child_range(ChildElementIter(this), + ChildElementIter()); + } + + const_fake_child_range underlying_data_elements() const { + return const_fake_child_range( + ChildElementIter(const_cast(this)), + ChildElementIter()); + } + + child_range children() { + return child_range(child_iterator(), child_iterator()); + } + + const_child_range children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == EmbedExprClass; + } + + ChildElementIter begin() { return ChildElementIter(this); } + + ChildElementIter begin() const { + return ChildElementIter(const_cast(this)); + } + + template + bool doForEachDataElement(Call &&C, unsigned &StartingIndexInArray, + Targs &&...Fargs) const { + for (auto It : underlying_data_elements()) { + if (!std::invoke(std::forward(C), const_cast(It), + StartingIndexInArray, std::forward(Fargs)...)) + return false; + StartingIndexInArray++; + } + return true; + } + +private: + friend class ASTStmtReader; +}; + /// Describes an C or C++ initializer list. /// /// InitListExpr describes an initializer list, which can be used to diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index aa55e2e7e87188..2785afd59bf216 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -2864,6 +2864,11 @@ DEF_TRAVERSE_STMT(ShuffleVectorExpr, {}) DEF_TRAVERSE_STMT(ConvertVectorExpr, {}) DEF_TRAVERSE_STMT(StmtExpr, {}) DEF_TRAVERSE_STMT(SourceLocExpr, {}) +DEF_TRAVERSE_STMT(EmbedExpr, { + for (IntegerLiteral *IL : S->underlying_data_elements()) { + TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(IL); + } +}) DEF_TRAVERSE_STMT(UnresolvedLookupExpr, { TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc())); diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index abfafcaef271b6..39dd1f515c9eb3 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -409,6 +409,7 @@ class TextNodeDumper void VisitHLSLBufferDecl(const HLSLBufferDecl *D); void VisitOpenACCConstructStmt(const OpenACCConstructStmt *S); void VisitOpenACCLoopConstruct(const OpenACCLoopConstruct *S); + void VisitEmbedExpr(const EmbedExpr *S); }; } // namespace clang diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index 1e44bc4ad09b6b..de758cbe679dcf 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -275,6 +275,9 @@ def err_too_large_for_fixed_point : Error< def err_unimplemented_conversion_with_fixed_point_type : Error< "conversion between fixed point and %0 is not yet supported">; +def err_requires_positive_value : Error< + "%select{invalid value '%0'; must be positive|value '%0' is too large}1">; + // SEH def err_seh_expected_handler : Error< "expected '__except' or '__finally' block">; diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 25fbfe83fa2bcf..12d7b8c0205ee9 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -436,6 +436,14 @@ def warn_cxx23_compat_warning_directive : Warning< def warn_c23_compat_warning_directive : Warning< "#warning is incompatible with C standards before C23">, InGroup, DefaultIgnore; +def ext_pp_embed_directive : ExtWarn< + "#embed is a %select{C23|Clang}0 extension">, + InGroup; +def warn_compat_pp_embed_directive : Warning< + "#embed is incompatible with C standards before C23">, + InGroup, DefaultIgnore; +def err_pp_embed_dup_params : Error< + "cannot specify parameter '%0' twice in the same '#embed' directive">; def ext_pp_extra_tokens_at_eol : ExtWarn< "extra tokens at end of #%0 directive">, InGroup; @@ -505,6 +513,8 @@ def err_pp_invalid_directive : Error< "invalid preprocessing directive%select{|, did you mean '#%1'?}0">; def warn_pp_invalid_directive : Warning< err_pp_invalid_directive.Summary>, InGroup>; +def err_pp_unknown_parameter : Error< + "unknown%select{ | embed}0 preprocessor parameter '%1'">; def err_pp_directive_required : Error< "%0 must be used within a preprocessing directive">; def err_pp_file_not_found : Error<"'%0' file not found">, DefaultFatal; @@ -719,6 +729,8 @@ def err_pp_module_build_missing_end : Error< "no matching '#pragma clang module endbuild' for this '#pragma clang module build'">; def err_defined_macro_name : Error<"'defined' cannot be used as a macro name">; +def err_defined_in_pp_embed : Error< + "'defined' cannot appear within this context">; def err_paste_at_start : Error< "'##' cannot appear at start of macro expansion">; def err_paste_at_end : Error<"'##' cannot appear at end of macro expansion">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 9b8f5b7e80e7ea..833e8b51c02572 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -1097,8 +1097,6 @@ def note_surrounding_namespace_starts_here : Note< "surrounding namespace with visibility attribute starts here">; def err_pragma_loop_invalid_argument_type : Error< "invalid argument of type %0; expected an integer type">; -def err_pragma_loop_invalid_argument_value : Error< - "%select{invalid value '%0'; must be positive|value '%0' is too large}1">; def err_pragma_loop_compatibility : Error< "%select{incompatible|duplicate}0 directives '%1' and '%2'">; def err_pragma_loop_precedes_nonloop : Error< diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index e1f33d57a89809..527bbef24793ee 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -286,12 +286,15 @@ class FileManager : public RefCountedBase { /// MemoryBuffer if successful, otherwise returning null. llvm::ErrorOr> getBufferForFile(FileEntryRef Entry, bool isVolatile = false, - bool RequiresNullTerminator = true); + bool RequiresNullTerminator = true, + std::optional MaybeLimit = std::nullopt); llvm::ErrorOr> getBufferForFile(StringRef Filename, bool isVolatile = false, - bool RequiresNullTerminator = true) const { - return getBufferForFileImpl(Filename, /*FileSize=*/-1, isVolatile, - RequiresNullTerminator); + bool RequiresNullTerminator = true, + std::optional MaybeLimit = std::nullopt) const { + return getBufferForFileImpl(Filename, + /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1), + isVolatile, RequiresNullTerminator); } private: diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index 6ca08abdb14f07..c59a17be7808f1 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -204,6 +204,7 @@ def OpaqueValueExpr : StmtNode; def TypoExpr : StmtNode; def RecoveryExpr : StmtNode; def BuiltinBitCastExpr : StmtNode; +def EmbedExpr : StmtNode; // Microsoft Extensions. def MSPropertyRefExpr : StmtNode; diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 9c4b17465e18a1..37d570ca5e75b5 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -126,6 +126,9 @@ PPKEYWORD(error) // C99 6.10.6 - Pragma Directive. PPKEYWORD(pragma) +// C23 & C++26 #embed +PPKEYWORD(embed) + // GNU Extensions. PPKEYWORD(import) PPKEYWORD(include_next) @@ -999,6 +1002,9 @@ ANNOTATION(header_unit) // Annotation for end of input in clang-repl. ANNOTATION(repl_input_end) +// Annotation for #embed +ANNOTATION(embed) + #undef PRAGMA_ANNOTATION #undef ANNOTATION #undef TESTING_KEYWORD diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 15f62c5c1a6ab4..0c04d272c1ac76 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -880,6 +880,9 @@ will be ignored}]>; def L : JoinedOrSeparate<["-"], "L">, Flags<[RenderJoined]>, Group, Visibility<[ClangOption, FlangOption]>, MetaVarName<"">, HelpText<"Add directory to library search path">; +def embed_dir_EQ : Joined<["--"], "embed-dir=">, Group, + Visibility<[ClangOption, CC1Option]>, MetaVarName<"">, + HelpText<"Add directory to embed search path">; def MD : Flag<["-"], "MD">, Group, HelpText<"Write a depfile containing user and system headers">; def MMD : Flag<["-"], "MMD">, Group, @@ -1473,6 +1476,9 @@ def dD : Flag<["-"], "dD">, Group, Visibility<[ClangOption, CC1Option]> def dI : Flag<["-"], "dI">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Print include directives in -E mode in addition to normal output">, MarshallingInfoFlag>; +def dE : Flag<["-"], "dE">, Group, Visibility<[CC1Option]>, + HelpText<"Print embed directives in -E mode in addition to normal output">, + MarshallingInfoFlag>; def dM : Flag<["-"], "dM">, Group, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, HelpText<"Print macro definitions in -E mode instead of normal output">; def dead__strip : Flag<["-"], "dead_strip">; diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h index 6e19cae33cf283..654cf22f010f73 100644 --- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h +++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h @@ -32,6 +32,8 @@ class PreprocessorOutputOptions { LLVM_PREFERRED_TYPE(bool) unsigned ShowIncludeDirectives : 1; ///< Print includes, imports etc. within preprocessed output. LLVM_PREFERRED_TYPE(bool) + unsigned ShowEmbedDirectives : 1; ///< Print embeds, etc. within preprocessed + LLVM_PREFERRED_TYPE(bool) unsigned RewriteIncludes : 1; ///< Preprocess include directives only. LLVM_PREFERRED_TYPE(bool) unsigned RewriteImports : 1; ///< Include contents of transitively-imported modules. @@ -51,6 +53,7 @@ class PreprocessorOutputOptions { ShowMacroComments = 0; ShowMacros = 0; ShowIncludeDirectives = 0; + ShowEmbedDirectives = 0; RewriteIncludes = 0; RewriteImports = 0; MinimizeWhitespace = 0; diff --git a/clang/include/clang/Lex/PPCallbacks.h b/clang/include/clang/Lex/PPCallbacks.h index dfc74b52686f1e..46cc564086f1c5 100644 --- a/clang/include/clang/Lex/PPCallbacks.h +++ b/clang/include/clang/Lex/PPCallbacks.h @@ -27,6 +27,7 @@ class IdentifierInfo; class MacroDefinition; class MacroDirective; class MacroArgs; +struct LexEmbedParametersResult; /// This interface provides a way to observe the actions of the /// preprocessor as it does its thing. @@ -83,6 +84,34 @@ class PPCallbacks { const Token &FilenameTok, SrcMgr::CharacteristicKind FileType) {} + /// Callback invoked whenever the preprocessor cannot find a file for an + /// embed directive. + /// + /// \param FileName The name of the file being included, as written in the + /// source code. + /// + /// \returns true to indicate that the preprocessor should skip this file + /// and not issue any diagnostic. + virtual bool EmbedFileNotFound(StringRef FileName) { return false; } + + /// Callback invoked whenever an embed directive has been processed, + /// regardless of whether the embed will actually find a file. + /// + /// \param HashLoc The location of the '#' that starts the embed directive. + /// + /// \param FileName The name of the file being included, as written in the + /// source code. + /// + /// \param IsAngled Whether the file name was enclosed in angle brackets; + /// otherwise, it was enclosed in quotes. + /// + /// \param File The actual file that may be included by this embed directive. + /// + /// \param Params The parameters used by the directive. + virtual void EmbedDirective(SourceLocation HashLoc, StringRef FileName, + bool IsAngled, OptionalFileEntryRef File, + const LexEmbedParametersResult &Params) {} + /// Callback invoked whenever the preprocessor cannot find a file for an /// inclusion directive. /// @@ -333,6 +362,10 @@ class PPCallbacks { SourceRange Range) { } + /// Hook called when a '__has_embed' directive is read. + virtual void HasEmbed(SourceLocation Loc, StringRef FileName, bool IsAngled, + OptionalFileEntryRef File) {} + /// Hook called when a '__has_include' or '__has_include_next' directive is /// read. virtual void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled, @@ -464,6 +497,21 @@ class PPChainedCallbacks : public PPCallbacks { Second->FileSkipped(SkippedFile, FilenameTok, FileType); } + bool EmbedFileNotFound(StringRef FileName) override { + bool Skip = First->FileNotFound(FileName); + // Make sure to invoke the second callback, no matter if the first already + // returned true to skip the file. + Skip |= Second->FileNotFound(FileName); + return Skip; + } + + void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled, + OptionalFileEntryRef File, + const LexEmbedParametersResult &Params) override { + First->EmbedDirective(HashLoc, FileName, IsAngled, File, Params); + Second->EmbedDirective(HashLoc, FileName, IsAngled, File, Params); + } + bool FileNotFound(StringRef FileName) override { bool Skip = First->FileNotFound(FileName); // Make sure to invoke the second callback, no matter if the first already @@ -565,6 +613,12 @@ class PPChainedCallbacks : public PPCallbacks { Second->PragmaDiagnostic(Loc, Namespace, mapping, Str); } + void HasEmbed(SourceLocation Loc, StringRef FileName, bool IsAngled, + OptionalFileEntryRef File) override { + First->HasEmbed(Loc, FileName, IsAngled, File); + Second->HasEmbed(Loc, FileName, IsAngled, File); + } + void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled, OptionalFileEntryRef File, SrcMgr::CharacteristicKind FileType) override; diff --git a/clang/include/clang/Lex/PPDirectiveParameter.h b/clang/include/clang/Lex/PPDirectiveParameter.h new file mode 100644 index 00000000000000..83f0566d739b10 --- /dev/null +++ b/clang/include/clang/Lex/PPDirectiveParameter.h @@ -0,0 +1,33 @@ +//===--- PPDirectiveParameter.h ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the base class for preprocessor directive parameters, such +// as limit(1) or suffix(x) for #embed. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_PPDIRECTIVEPARAMETER_H +#define LLVM_CLANG_LEX_PPDIRECTIVEPARAMETER_H + +#include "clang/Basic/SourceLocation.h" + +namespace clang { + +/// Captures basic information about a preprocessor directive parameter. +class PPDirectiveParameter { + SourceRange R; + +public: + PPDirectiveParameter(SourceRange R) : R(R) {} + + SourceRange getParameterRange() const { return R; } +}; + +} // end namespace clang + +#endif diff --git a/clang/include/clang/Lex/PPEmbedParameters.h b/clang/include/clang/Lex/PPEmbedParameters.h new file mode 100644 index 00000000000000..51bf908524e7ab --- /dev/null +++ b/clang/include/clang/Lex/PPEmbedParameters.h @@ -0,0 +1,94 @@ +//===--- PPEmbedParameters.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines all of the preprocessor directive parmeters for #embed +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_PPEMBEDPARAMETERS_H +#define LLVM_CLANG_LEX_PPEMBEDPARAMETERS_H + +#include "clang/Lex/PPDirectiveParameter.h" +#include "clang/Lex/Token.h" +#include "llvm/ADT/SmallVector.h" + +namespace clang { + +/// Preprocessor extension embed parameter "clang::offset" +/// `clang::offset( constant-expression )` +class PPEmbedParameterOffset : public PPDirectiveParameter { +public: + size_t Offset; + + PPEmbedParameterOffset(size_t Offset, SourceRange R) + : PPDirectiveParameter(R), Offset(Offset) {} +}; + +/// Preprocessor standard embed parameter "limit" +/// `limit( constant-expression )` +class PPEmbedParameterLimit : public PPDirectiveParameter { +public: + size_t Limit; + + PPEmbedParameterLimit(size_t Limit, SourceRange R) + : PPDirectiveParameter(R), Limit(Limit) {} +}; + +/// Preprocessor standard embed parameter "prefix" +/// `prefix( balanced-token-seq )` +class PPEmbedParameterPrefix : public PPDirectiveParameter { +public: + SmallVector Tokens; + + PPEmbedParameterPrefix(SmallVectorImpl &&Tokens, SourceRange R) + : PPDirectiveParameter(R), Tokens(std::move(Tokens)) {} +}; + +/// Preprocessor standard embed parameter "suffix" +/// `suffix( balanced-token-seq )` +class PPEmbedParameterSuffix : public PPDirectiveParameter { +public: + SmallVector Tokens; + + PPEmbedParameterSuffix(SmallVectorImpl &&Tokens, SourceRange R) + : PPDirectiveParameter(R), Tokens(std::move(Tokens)) {} +}; + +/// Preprocessor standard embed parameter "if_empty" +/// `if_empty( balanced-token-seq )` +class PPEmbedParameterIfEmpty : public PPDirectiveParameter { +public: + SmallVector Tokens; + + PPEmbedParameterIfEmpty(SmallVectorImpl &&Tokens, SourceRange R) + : PPDirectiveParameter(R), Tokens(std::move(Tokens)) {} +}; + +struct LexEmbedParametersResult { + std::optional MaybeLimitParam; + std::optional MaybeOffsetParam; + std::optional MaybeIfEmptyParam; + std::optional MaybePrefixParam; + std::optional MaybeSuffixParam; + SourceRange ParamRange; + int UnrecognizedParams; + + size_t PrefixTokenCount() const { + if (MaybePrefixParam) + return MaybePrefixParam->Tokens.size(); + return 0; + } + size_t SuffixTokenCount() const { + if (MaybeSuffixParam) + return MaybeSuffixParam->Tokens.size(); + return 0; + } +}; +} // end namespace clang + +#endif diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 9d8a1aae23df3e..be3334b9807463 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -29,8 +29,10 @@ #include "clang/Lex/ModuleLoader.h" #include "clang/Lex/ModuleMap.h" #include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/PPEmbedParameters.h" #include "clang/Lex/Token.h" #include "clang/Lex/TokenLexer.h" +#include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" @@ -119,6 +121,13 @@ enum MacroUse { MU_Undef = 2 }; +enum class EmbedResult { + Invalid = -1, // Parsing error occurred. + NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__ + Found = 1, // Corresponds to __STDC_EMBED_FOUND__ + Empty = 2, // Corresponds to __STDC_EMBED_EMPTY__ +}; + /// Engages in a tight little dance with the lexer to efficiently /// preprocess tokens. /// @@ -165,6 +174,7 @@ class Preprocessor { IdentifierInfo *Ident__has_builtin; // __has_builtin IdentifierInfo *Ident__has_constexpr_builtin; // __has_constexpr_builtin IdentifierInfo *Ident__has_attribute; // __has_attribute + IdentifierInfo *Ident__has_embed; // __has_embed IdentifierInfo *Ident__has_include; // __has_include IdentifierInfo *Ident__has_include_next; // __has_include_next IdentifierInfo *Ident__has_warning; // __has_warning @@ -1734,6 +1744,10 @@ class Preprocessor { /// Lex a token, forming a header-name token if possible. bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true); + /// Lex the parameters for an #embed directive, returns nullopt on error. + std::optional LexEmbedParameters(Token &Current, + bool ForHasEmbed); + bool LexAfterModuleImport(Token &Result); void CollectPpImportSuffix(SmallVectorImpl &Toks); @@ -2314,7 +2328,13 @@ class Preprocessor { /// Read and discard all tokens remaining on the current line until /// the tok::eod token is found. Returns the range of the skipped tokens. - SourceRange DiscardUntilEndOfDirective(); + SourceRange DiscardUntilEndOfDirective() { + Token Tmp; + return DiscardUntilEndOfDirective(Tmp); + } + + /// Same as above except retains the token that was found. + SourceRange DiscardUntilEndOfDirective(Token &Tok); /// Returns true if the preprocessor has seen a use of /// __DATE__ or __TIME__ in the file so far. @@ -2419,6 +2439,18 @@ class Preprocessor { bool *IsFrameworkFound, bool SkipCache = false, bool OpenFile = true, bool CacheFailures = true); + /// Given a "Filename" or \ reference, look up the indicated embed + /// resource. \p isAngled indicates whether the file reference is for + /// system \#include's or not (i.e. using <> instead of ""). If \p OpenFile + /// is true, the file looked up is opened for reading, otherwise it only + /// validates that the file exists. Quoted filenames are looked up relative + /// to \p LookupFromFile if it is nonnull. + /// + /// Returns std::nullopt on failure. + OptionalFileEntryRef + LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile, + const FileEntry *LookupFromFile = nullptr); + /// Return true if we're in the top-level file, not in a \#include. bool isInPrimaryFile() const; @@ -2524,6 +2556,9 @@ class Preprocessor { /// Information about the result for evaluating an expression for a /// preprocessor directive. struct DirectiveEvalResult { + /// The integral value of the expression. + std::optional Value; + /// Whether the expression was evaluated as true or not. bool Conditional; @@ -2538,7 +2573,25 @@ class Preprocessor { /// \#if or \#elif directive and return a \p DirectiveEvalResult object. /// /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. - DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro); + DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, + bool CheckForEoD = true); + + /// Evaluate an integer constant expression that may occur after a + /// \#if or \#elif directive and return a \p DirectiveEvalResult object. + /// + /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. + /// \p EvaluatedDefined will contain the result of whether "defined" appeared + /// in the evaluated expression or not. + DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, + Token &Tok, + bool &EvaluatedDefined, + bool CheckForEoD = true); + + /// Process a '__has_embed("path" [, ...])' expression. + /// + /// Returns predefined `__STDC_EMBED_*` macro values if + /// successful. + EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II); /// Process a '__has_include("path")' expression. /// @@ -2686,6 +2739,12 @@ class Preprocessor { const FileEntry *LookupFromFile, StringRef &LookupFilename, SmallVectorImpl &RelativePath, SmallVectorImpl &SearchPath, ModuleMap::KnownHeader &SuggestedModule, bool isAngled); + // Binary data inclusion + void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok, + const FileEntry *LookupFromFile = nullptr); + void HandleEmbedDirectiveImpl(SourceLocation HashLoc, + const LexEmbedParametersResult &Params, + StringRef BinaryContents); // File inclusion. void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok, @@ -3002,6 +3061,12 @@ class EmptylineHandler { virtual void HandleEmptyline(SourceRange Range) = 0; }; +/// Helper class to shuttle information about #embed directives from the +/// preprocessor to the parser through an annotation token. +struct EmbedAnnotationData { + StringRef BinaryData; +}; + /// Registry of pragma handlers added by plugins using PragmaHandlerRegistry = llvm::Registry; diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h index 635971d0ce5ee8..c2e3d68333024a 100644 --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -170,6 +170,9 @@ class PreprocessorOptions { /// of the specified memory buffer (the second part of each pair). std::vector> RemappedFileBuffers; + /// User specified embed entries. + std::vector EmbedEntries; + /// Whether the compiler instance should retain (i.e., not free) /// the buffers associated with remapped files. /// diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index d054b8cf0d2405..95c0655f9a2145 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -2122,6 +2122,8 @@ class Parser : public CodeCompletionHandler { QualType PreferredBaseType; }; ExprResult ParseInitializerWithPotentialDesignator(DesignatorCompletionInfo); + ExprResult createEmbedExpr(); + void ExpandEmbedDirective(SmallVectorImpl &Exprs); //===--------------------------------------------------------------------===// // clang Expressions @@ -3813,6 +3815,7 @@ class Parser : public CodeCompletionHandler { AnnotateTemplateIdTokenAsType(CXXScopeSpec &SS, ImplicitTypenameContext AllowImplicitTypename, bool IsClassName = false); + void ExpandEmbedIntoTemplateArgList(TemplateArgList &TemplateArgs); bool ParseTemplateArgumentList(TemplateArgList &TemplateArgs, TemplateTy Template, SourceLocation OpenLoc); ParsedTemplateArgument ParseTemplateTemplateArgument(); diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 4d4579fcfd456b..21783e24ba6b5a 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -5729,6 +5729,10 @@ class Sema final : public SemaBase { SourceLocation BuiltinLoc, SourceLocation RPLoc); + // #embed + ExprResult ActOnEmbedExpr(SourceLocation EmbedKeywordLoc, + StringLiteral *BinaryData); + // Build a potentially resolved SourceLocExpr. ExprResult BuildSourceLocExpr(SourceLocIdentKind Kind, QualType ResultTy, SourceLocation BuiltinLoc, SourceLocation RPLoc, diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index a4728b1c06b3fe..8222c92d6506fe 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1655,6 +1655,9 @@ enum StmtCode { /// A SourceLocExpr record. EXPR_SOURCE_LOC, + /// A EmbedExpr record. + EXPR_BUILTIN_PP_EMBED, + /// A ShuffleVectorExpr record. EXPR_SHUFFLE_VECTOR, diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 7e555689b64c48..04b331a9119138 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -2373,6 +2373,17 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx, llvm_unreachable("unhandled case"); } +EmbedExpr::EmbedExpr(const ASTContext &Ctx, SourceLocation Loc, + EmbedDataStorage *Data, unsigned Begin, + unsigned NumOfElements) + : Expr(EmbedExprClass, Ctx.UnsignedCharTy, VK_PRValue, OK_Ordinary), + EmbedKeywordLoc(Loc), Ctx(&Ctx), Data(Data), Begin(Begin), + NumOfElements(NumOfElements) { + setDependence(ExprDependence::None); + FakeChildNode = IntegerLiteral::Create( + Ctx, llvm::APInt::getZero(Ctx.getTypeSize(getType())), getType(), Loc); +} + InitListExpr::InitListExpr(const ASTContext &C, SourceLocation lbraceloc, ArrayRef initExprs, SourceLocation rbraceloc) : Expr(InitListExprClass, QualType(), VK_PRValue, OK_Ordinary), @@ -3615,6 +3626,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx, case CXXUuidofExprClass: case OpaqueValueExprClass: case SourceLocExprClass: + case EmbedExprClass: case ConceptSpecializationExprClass: case RequiresExprClass: case SYCLUniqueStableNameExprClass: diff --git a/clang/lib/AST/ExprClassification.cpp b/clang/lib/AST/ExprClassification.cpp index 390000e3ed3835..6482cb6d39acc6 100644 --- a/clang/lib/AST/ExprClassification.cpp +++ b/clang/lib/AST/ExprClassification.cpp @@ -204,6 +204,11 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) { case Expr::RequiresExprClass: return Cl::CL_PRValue; + case Expr::EmbedExprClass: + // Nominally, this just goes through as a PRValue until we actually expand + // it and check it. + return Cl::CL_PRValue; + // Make HLSL this reference-like case Expr::CXXThisExprClass: return Lang.HLSL ? Cl::CL_LValue : Cl::CL_PRValue; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 7178f081d9cf35..a06a9a0236c6bc 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -7727,6 +7727,11 @@ class ExprEvaluatorBase return Error(E); } + bool VisitEmbedExpr(const EmbedExpr *E) { + const auto It = E->begin(); + return StmtVisitorTy::Visit(*It); + } + bool VisitPredefinedExpr(const PredefinedExpr *E) { return StmtVisitorTy::Visit(E->getFunctionName()); } @@ -9145,6 +9150,11 @@ class PointerExprEvaluator return true; } + bool VisitEmbedExpr(const EmbedExpr *E) { + llvm::report_fatal_error("Not yet implemented for ExprConstant.cpp"); + return true; + } + bool VisitSYCLUniqueStableNameExpr(const SYCLUniqueStableNameExpr *E) { std::string ResultStr = E->ComputeName(Info.Ctx); @@ -11249,8 +11259,17 @@ bool ArrayExprEvaluator::VisitCXXParenListOrInitListExpr( // If the initializer might depend on the array index, run it for each // array element. - if (NumEltsToInit != NumElts && MaybeElementDependentArrayFiller(ArrayFiller)) + if (NumEltsToInit != NumElts && + MaybeElementDependentArrayFiller(ArrayFiller)) { NumEltsToInit = NumElts; + } else { + for (auto *Init : Args) { + if (auto *EmbedS = dyn_cast(Init->IgnoreParenImpCasts())) + NumEltsToInit += EmbedS->getDataElementCount() - 1; + } + if (NumEltsToInit > NumElts) + NumEltsToInit = NumElts; + } LLVM_DEBUG(llvm::dbgs() << "The number of elements to initialize: " << NumEltsToInit << ".\n"); @@ -11268,16 +11287,49 @@ bool ArrayExprEvaluator::VisitCXXParenListOrInitListExpr( LValue Subobject = This; Subobject.addArray(Info, ExprToVisit, CAT); - for (unsigned Index = 0; Index != NumEltsToInit; ++Index) { - const Expr *Init = Index < Args.size() ? Args[Index] : ArrayFiller; - if (!EvaluateInPlace(Result.getArrayInitializedElt(Index), - Info, Subobject, Init) || + auto Eval = [&](const Expr *Init, unsigned ArrayIndex) { + if (!EvaluateInPlace(Result.getArrayInitializedElt(ArrayIndex), Info, + Subobject, Init) || !HandleLValueArrayAdjustment(Info, Init, Subobject, CAT->getElementType(), 1)) { if (!Info.noteFailure()) return false; Success = false; } + return true; + }; + unsigned ArrayIndex = 0; + QualType DestTy = CAT->getElementType(); + APSInt Value(Info.Ctx.getTypeSize(DestTy), DestTy->isUnsignedIntegerType()); + for (unsigned Index = 0; Index != NumEltsToInit; ++Index) { + const Expr *Init = Index < Args.size() ? Args[Index] : ArrayFiller; + if (ArrayIndex >= NumEltsToInit) + break; + if (auto *EmbedS = dyn_cast(Init->IgnoreParenImpCasts())) { + StringLiteral *SL = EmbedS->getDataStringLiteral(); + for (unsigned I = EmbedS->getStartingElementPos(), + N = EmbedS->getDataElementCount(); + I != EmbedS->getStartingElementPos() + N; ++I) { + Value = SL->getCodeUnit(I); + if (DestTy->isIntegerType()) { + Result.getArrayInitializedElt(ArrayIndex) = APValue(Value); + } else { + assert(DestTy->isFloatingType() && "unexpected type"); + const FPOptions FPO = + Init->getFPFeaturesInEffect(Info.Ctx.getLangOpts()); + APFloat FValue(0.0); + if (!HandleIntToFloatCast(Info, Init, FPO, EmbedS->getType(), Value, + DestTy, FValue)) + return false; + Result.getArrayInitializedElt(ArrayIndex) = APValue(FValue); + } + ArrayIndex++; + } + } else { + if (!Eval(Init, ArrayIndex)) + return false; + ++ArrayIndex; + } } if (!Result.hasArrayFiller()) @@ -16363,6 +16415,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) { case Expr::SizeOfPackExprClass: case Expr::GNUNullExprClass: case Expr::SourceLocExprClass: + case Expr::EmbedExprClass: return NoDiag(); case Expr::PackIndexingExprClass: diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index e61c0a70a0d8aa..6362f47787cad2 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -1231,11 +1231,20 @@ bool ByteCodeExprGen::visitInitList(ArrayRef Inits, } } + auto Eval = [&](Expr *Init, unsigned ElemIndex) { + return visitArrayElemInit(ElemIndex, Init); + }; + unsigned ElementIndex = 0; for (const Expr *Init : Inits) { - if (!this->visitArrayElemInit(ElementIndex, Init)) - return false; - ++ElementIndex; + if (auto *EmbedS = dyn_cast(Init->IgnoreParenImpCasts())) { + if (!EmbedS->doForEachDataElement(Eval, ElementIndex)) + return false; + } else { + if (!this->visitArrayElemInit(ElementIndex, Init)) + return false; + ++ElementIndex; + } } // Expand the filler expression. @@ -1381,6 +1390,12 @@ bool ByteCodeExprGen::VisitConstantExpr(const ConstantExpr *E) { return this->delegate(E->getSubExpr()); } +template +bool ByteCodeExprGen::VisitEmbedExpr(const EmbedExpr *E) { + auto It = E->begin(); + return this->visit(*It); +} + static CharUnits AlignOfType(QualType T, const ASTContext &ASTCtx, UnaryExprOrTypeTrait Kind) { bool AlignOfReturnsPreferred = diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h index b0faac8020fb21..155bede340fb42 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.h +++ b/clang/lib/AST/Interp/ByteCodeExprGen.h @@ -115,6 +115,7 @@ class ByteCodeExprGen : public ConstStmtVisitor, bool>, bool VisitSizeOfPackExpr(const SizeOfPackExpr *E); bool VisitGenericSelectionExpr(const GenericSelectionExpr *E); bool VisitChooseExpr(const ChooseExpr *E); + bool VisitEmbedExpr(const EmbedExpr *E); bool VisitObjCBoolLiteralExpr(const ObjCBoolLiteralExpr *E); bool VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E); bool VisitExpressionTraitExpr(const ExpressionTraitExpr *E); diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index ed9e6eeb36c75d..eac18014452555 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -4760,6 +4760,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity, case Expr::PseudoObjectExprClass: case Expr::AtomicExprClass: case Expr::SourceLocExprClass: + case Expr::EmbedExprClass: case Expr::BuiltinBitCastExprClass: { NotPrimaryExpr(); diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 8f51d16b5db037..5241a5cdbf0095 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -1177,6 +1177,10 @@ void StmtPrinter::VisitSourceLocExpr(SourceLocExpr *Node) { OS << Node->getBuiltinStr() << "()"; } +void StmtPrinter::VisitEmbedExpr(EmbedExpr *Node) { + llvm::report_fatal_error("Not implemented"); +} + void StmtPrinter::VisitConstantExpr(ConstantExpr *Node) { PrintExpr(Node->getSubExpr()); } diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index d1655905a66566..1add5caaf9f2ec 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -2313,6 +2313,8 @@ void StmtProfiler::VisitSourceLocExpr(const SourceLocExpr *E) { VisitExpr(E); } +void StmtProfiler::VisitEmbedExpr(const EmbedExpr *E) { VisitExpr(E); } + void StmtProfiler::VisitRecoveryExpr(const RecoveryExpr *E) { VisitExpr(E); } void StmtProfiler::VisitObjCStringLiteral(const ObjCStringLiteral *S) { diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index bd1e630cd90470..a26f50f0719c11 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -2884,3 +2884,8 @@ void TextNodeDumper::VisitOpenACCLoopConstruct(const OpenACCLoopConstruct *S) { else OS << " parent: " << S->getParentComputeConstruct(); } + +void TextNodeDumper::VisitEmbedExpr(const EmbedExpr *S) { + AddChild("begin", [=] { OS << S->getStartingElementPos(); }); + AddChild("number of elements", [=] { OS << S->getDataElementCount(); }); +} diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp index 1dc51deb829870..4509cee1ca0fed 100644 --- a/clang/lib/Basic/FileManager.cpp +++ b/clang/lib/Basic/FileManager.cpp @@ -530,13 +530,18 @@ void FileManager::fillRealPathName(FileEntry *UFE, llvm::StringRef FileName) { llvm::ErrorOr> FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile, - bool RequiresNullTerminator) { + bool RequiresNullTerminator, + std::optional MaybeLimit) { const FileEntry *Entry = &FE.getFileEntry(); // If the content is living on the file entry, return a reference to it. if (Entry->Content) return llvm::MemoryBuffer::getMemBuffer(Entry->Content->getMemBufferRef()); uint64_t FileSize = Entry->getSize(); + + if (MaybeLimit) + FileSize = *MaybeLimit; + // If there's a high enough chance that the file have changed since we // got its size, force a stat before opening it. if (isVolatile || Entry->isNamedPipe()) diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index feea84544d62fb..4f7ccaf4021d63 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -425,8 +425,8 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { // collisions (if there were, the switch below would complain about duplicate // case values). Note that this depends on 'if' being null terminated. -#define HASH(LEN, FIRST, THIRD) \ - (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31) +#define HASH(LEN, FIRST, THIRD) \ + (LEN << 6) + (((FIRST - 'a') - (THIRD - 'a')) & 63) #define CASE(LEN, FIRST, THIRD, NAME) \ case HASH(LEN, FIRST, THIRD): \ return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME @@ -441,6 +441,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { CASE( 4, 'e', 's', else); CASE( 4, 'l', 'n', line); CASE( 4, 's', 'c', sccs); + CASE( 5, 'e', 'b', embed); CASE( 5, 'e', 'd', endif); CASE( 5, 'e', 'r', error); CASE( 5, 'i', 'e', ident); diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index c369163ffaa9c2..0c87558521924e 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -506,6 +506,16 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, uint64_t NumInitElements = Args.size(); uint64_t NumArrayElements = AType->getNumElements(); + for (const auto *Init : Args) { + if (const auto *Embed = dyn_cast(Init->IgnoreParenImpCasts())) { + NumInitElements += Embed->getDataElementCount() - 1; + if (NumInitElements > NumArrayElements) { + NumInitElements = NumArrayElements; + break; + } + } + } + assert(NumInitElements <= NumArrayElements); QualType elementType = @@ -574,23 +584,37 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, llvm::Value *one = llvm::ConstantInt::get(CGF.SizeTy, 1); - // Emit the explicit initializers. - for (uint64_t i = 0; i != NumInitElements; ++i) { + auto Emit = [&](Expr *Init, uint64_t ArrayIndex) { llvm::Value *element = begin; - if (i > 0) { - element = Builder.CreateInBoundsGEP(llvmElementType, begin, - llvm::ConstantInt::get(CGF.SizeTy, i), - "arrayinit.element"); + if (ArrayIndex > 0) { + element = Builder.CreateInBoundsGEP( + llvmElementType, begin, + llvm::ConstantInt::get(CGF.SizeTy, ArrayIndex), "arrayinit.element"); // Tell the cleanup that it needs to destroy up to this // element. TODO: some of these stores can be trivially // observed to be unnecessary. - if (endOfInit.isValid()) Builder.CreateStore(element, endOfInit); + if (endOfInit.isValid()) + Builder.CreateStore(element, endOfInit); } LValue elementLV = CGF.MakeAddrLValue( Address(element, llvmElementType, elementAlign), elementType); - EmitInitializationToLValue(Args[i], elementLV); + EmitInitializationToLValue(Init, elementLV); + return true; + }; + + unsigned ArrayIndex = 0; + // Emit the explicit initializers. + for (uint64_t i = 0; i != NumInitElements; ++i) { + if (ArrayIndex >= NumInitElements) + break; + if (auto *EmbedS = dyn_cast(Args[i]->IgnoreParenImpCasts())) { + EmbedS->doForEachDataElement(Emit, ArrayIndex); + } else { + Emit(Args[i], ArrayIndex); + ArrayIndex++; + } } // Check whether there's a non-trivial array-fill expression. diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 0712f40fd8215a..0fd3792c329cd5 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -1061,6 +1061,24 @@ class ConstExprEmitter return Visit(E->getInitializer(), T); } + llvm::Constant *ProduceIntToIntCast(const Expr *E, QualType DestType) { + QualType FromType = E->getType(); + // See also HandleIntToIntCast in ExprConstant.cpp + if (FromType->isIntegerType()) + if (llvm::Constant *C = Visit(E, FromType)) + if (auto *CI = dyn_cast(C)) { + unsigned SrcWidth = CGM.getContext().getIntWidth(FromType); + unsigned DstWidth = CGM.getContext().getIntWidth(DestType); + if (DstWidth == SrcWidth) + return CI; + llvm::APInt A = FromType->isSignedIntegerType() + ? CI->getValue().sextOrTrunc(DstWidth) + : CI->getValue().zextOrTrunc(DstWidth); + return llvm::ConstantInt::get(CGM.getLLVMContext(), A); + } + return nullptr; + } + llvm::Constant *VisitCastExpr(const CastExpr *E, QualType destType) { if (const auto *ECE = dyn_cast(E)) CGM.EmitExplicitCastExprType(ECE, Emitter.CGF); @@ -1142,23 +1160,8 @@ class ConstExprEmitter case CK_IntToOCLSampler: llvm_unreachable("global sampler variables are not generated"); - case CK_IntegralCast: { - QualType FromType = subExpr->getType(); - // See also HandleIntToIntCast in ExprConstant.cpp - if (FromType->isIntegerType()) - if (llvm::Constant *C = Visit(subExpr, FromType)) - if (auto *CI = dyn_cast(C)) { - unsigned SrcWidth = CGM.getContext().getIntWidth(FromType); - unsigned DstWidth = CGM.getContext().getIntWidth(destType); - if (DstWidth == SrcWidth) - return CI; - llvm::APInt A = FromType->isSignedIntegerType() - ? CI->getValue().sextOrTrunc(DstWidth) - : CI->getValue().zextOrTrunc(DstWidth); - return llvm::ConstantInt::get(CGM.getLLVMContext(), A); - } - return nullptr; - } + case CK_IntegralCast: + return ProduceIntToIntCast(subExpr, destType); case CK_Dependent: llvm_unreachable("saw dependent cast!"); @@ -1249,15 +1252,42 @@ class ConstExprEmitter return llvm::ConstantInt::get(CGM.getLLVMContext(), I->getValue()); } + static APValue withDestType(ASTContext &Ctx, const Expr *E, QualType SrcType, + QualType DestType, const llvm::APSInt &Value) { + if (!Ctx.hasSameType(SrcType, DestType)) { + if (DestType->isFloatingType()) { + llvm::APFloat Result = + llvm::APFloat(Ctx.getFloatTypeSemantics(DestType), 1); + llvm::RoundingMode RM = + E->getFPFeaturesInEffect(Ctx.getLangOpts()).getRoundingMode(); + if (RM == llvm::RoundingMode::Dynamic) + RM = llvm::RoundingMode::NearestTiesToEven; + Result.convertFromAPInt(Value, Value.isSigned(), RM); + return APValue(Result); + } + } + return APValue(Value); + } + llvm::Constant *EmitArrayInitialization(const InitListExpr *ILE, QualType T) { auto *CAT = CGM.getContext().getAsConstantArrayType(ILE->getType()); assert(CAT && "can't emit array init for non-constant-bound array"); + uint64_t NumInitElements = ILE->getNumInits(); const uint64_t NumElements = CAT->getZExtSize(); + for (const auto *Init : ILE->inits()) { + if (const auto *Embed = + dyn_cast(Init->IgnoreParenImpCasts())) { + NumInitElements += Embed->getDataElementCount() - 1; + if (NumInitElements > NumElements) { + NumInitElements = NumElements; + break; + } + } + } // Initialising an array requires us to automatically // initialise any elements that have not been initialised explicitly - uint64_t NumInitableElts = - std::min(ILE->getNumInits(), NumElements); + uint64_t NumInitableElts = std::min(NumInitElements, NumElements); QualType EltType = CAT->getElementType(); @@ -1270,23 +1300,61 @@ class ConstExprEmitter } // Copy initializer elements. - SmallVector Elts; + SmallVector Elts; if (fillC && fillC->isNullValue()) Elts.reserve(NumInitableElts + 1); else Elts.reserve(NumElements); llvm::Type *CommonElementType = nullptr; - for (unsigned i = 0; i < NumInitableElts; ++i) { - const Expr *Init = ILE->getInit(i); - llvm::Constant *C = Emitter.tryEmitPrivateForMemory(Init, EltType); + auto Emit = [&](const Expr *Init, unsigned ArrayIndex) { + llvm::Constant *C = nullptr; + C = Emitter.tryEmitPrivateForMemory(Init, EltType); if (!C) - return nullptr; - if (i == 0) + return false; + if (ArrayIndex == 0) CommonElementType = C->getType(); else if (C->getType() != CommonElementType) CommonElementType = nullptr; Elts.push_back(C); + return true; + }; + + unsigned ArrayIndex = 0; + QualType DestTy = CAT->getElementType(); + for (unsigned i = 0; i < ILE->getNumInits(); ++i) { + const Expr *Init = ILE->getInit(i); + if (auto *EmbedS = dyn_cast(Init->IgnoreParenImpCasts())) { + StringLiteral *SL = EmbedS->getDataStringLiteral(); + llvm::APSInt Value(CGM.getContext().getTypeSize(DestTy), + DestTy->isUnsignedIntegerType()); + llvm::Constant *C; + for (unsigned I = EmbedS->getStartingElementPos(), + N = EmbedS->getDataElementCount(); + I != EmbedS->getStartingElementPos() + N; ++I) { + Value = SL->getCodeUnit(I); + if (DestTy->isIntegerType()) { + C = llvm::ConstantInt::get(CGM.getLLVMContext(), Value); + } else { + C = Emitter.tryEmitPrivateForMemory( + withDestType(CGM.getContext(), Init, EmbedS->getType(), DestTy, + Value), + EltType); + } + if (!C) + return nullptr; + Elts.push_back(C); + ArrayIndex++; + } + if ((ArrayIndex - EmbedS->getDataElementCount()) == 0) + CommonElementType = C->getType(); + else if (C->getType() != CommonElementType) + CommonElementType = nullptr; + } else { + if (!Emit(Init, ArrayIndex)) + return nullptr; + ArrayIndex++; + } } llvm::ArrayType *Desired = diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 7e76e57bc3f02b..f40f3c273206bb 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -506,6 +506,7 @@ class ScalarExprEmitter } Value *VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *E); + Value *VisitEmbedExpr(EmbedExpr *E); Value *VisitOpaqueValueExpr(OpaqueValueExpr *E) { if (E->isGLValue()) @@ -1796,6 +1797,12 @@ ScalarExprEmitter::VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *E) { "usn_addr_cast"); } +Value *ScalarExprEmitter::VisitEmbedExpr(EmbedExpr *E) { + assert(E->getDataElementCount() == 1); + auto It = E->begin(); + return Builder.getInt((*It)->getValue()); +} + Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) { // Vector Mask Case if (E->getNumSubExprs() == 2) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 331cf6e713d890..2ce9e2f4bcfcd4 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1220,7 +1220,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, Args.addAllArgs(CmdArgs, {options::OPT_D, options::OPT_U, options::OPT_I_Group, - options::OPT_F, options::OPT_index_header_map}); + options::OPT_F, options::OPT_index_header_map, + options::OPT_embed_dir_EQ}); // Add -Wp, and -Xpreprocessor if using the preprocessor. @@ -8505,6 +8506,9 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, // Pass along any -I options so we get proper .include search paths. Args.AddAllArgs(CmdArgs, options::OPT_I_Group); + // Pass along any --embed-dir or similar options so we get proper embed paths. + Args.AddAllArgs(CmdArgs, options::OPT_embed_dir_EQ); + // Determine the original source input. auto FindSource = [](const Action *S) -> const Action * { while (S->getKind() != Action::InputClass) { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 58694e5399d58c..cde4a84673b6ef 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -4492,6 +4492,9 @@ static void GeneratePreprocessorArgs(const PreprocessorOptions &Opts, if (Opts.DefineTargetOSMacros) GenerateArg(Consumer, OPT_fdefine_target_os_macros); + for (const auto &EmbedEntry : Opts.EmbedEntries) + GenerateArg(Consumer, OPT_embed_dir_EQ, EmbedEntry); + // Don't handle LexEditorPlaceholders. It is implied by the action that is // generated elsewhere. } @@ -4584,6 +4587,11 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args, } } + for (const auto *A : Args.filtered(OPT_embed_dir_EQ)) { + StringRef Val = A->getValue(); + Opts.EmbedEntries.push_back(std::string(Val)); + } + // Always avoid lexing editor placeholders when we're just running the // preprocessor as we never want to emit the // "editor placeholder in source file" error in PP only mode. diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp index 369816e89e1d6c..528eae2c5283ea 100644 --- a/clang/lib/Frontend/DependencyFile.cpp +++ b/clang/lib/Frontend/DependencyFile.cpp @@ -62,6 +62,19 @@ struct DepCollectorPPCallbacks : public PPCallbacks { /*IsMissing=*/false); } + void EmbedDirective(SourceLocation, StringRef, bool, + OptionalFileEntryRef File, + const LexEmbedParametersResult &) override { + assert(File && "expected to only be called when the file is found"); + StringRef FileName = + llvm::sys::path::remove_leading_dotslash(File->getName()); + DepCollector.maybeAddDependency(FileName, + /*FromModule*/ false, + /*IsSystem*/ false, + /*IsModuleFile*/ false, + /*IsMissing*/ false); + } + void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, @@ -77,6 +90,18 @@ struct DepCollectorPPCallbacks : public PPCallbacks { // Files that actually exist are handled by FileChanged. } + void HasEmbed(SourceLocation, StringRef, bool, + OptionalFileEntryRef File) override { + if (!File) + return; + StringRef Filename = + llvm::sys::path::remove_leading_dotslash(File->getName()); + DepCollector.maybeAddDependency(Filename, + /*FromModule=*/false, false, + /*IsModuleFile=*/false, + /*IsMissing=*/false); + } + void HasInclude(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled, OptionalFileEntryRef File, SrcMgr::CharacteristicKind FileType) override { diff --git a/clang/lib/Frontend/DependencyGraph.cpp b/clang/lib/Frontend/DependencyGraph.cpp index 20e5f233e224e2..c23ce66a40dd0b 100644 --- a/clang/lib/Frontend/DependencyGraph.cpp +++ b/clang/lib/Frontend/DependencyGraph.cpp @@ -43,7 +43,7 @@ class DependencyGraphCallback : public PPCallbacks { public: DependencyGraphCallback(const Preprocessor *_PP, StringRef OutputFile, StringRef SysRoot) - : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) { } + : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) {} void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, @@ -53,6 +53,10 @@ class DependencyGraphCallback : public PPCallbacks { bool ModuleImported, SrcMgr::CharacteristicKind FileType) override; + void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled, + OptionalFileEntryRef File, + const LexEmbedParametersResult &Params) override; + void EndOfMainFile() override { OutputGraphFile(); } @@ -86,6 +90,24 @@ void DependencyGraphCallback::InclusionDirective( AllFiles.insert(*FromFile); } +void DependencyGraphCallback::EmbedDirective(SourceLocation HashLoc, StringRef, + bool, OptionalFileEntryRef File, + const LexEmbedParametersResult &) { + if (!File) + return; + + SourceManager &SM = PP->getSourceManager(); + OptionalFileEntryRef FromFile = + SM.getFileEntryRefForID(SM.getFileID(SM.getExpansionLoc(HashLoc))); + if (!FromFile) + return; + + Dependencies[*FromFile].push_back(*File); + + AllFiles.insert(*File); + AllFiles.insert(*FromFile); +} + raw_ostream & DependencyGraphCallback::writeNodeReference(raw_ostream &OS, const FileEntry *Node) { diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index e8c8a5175f8f41..2d5c94c7602522 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -508,6 +508,14 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, Builder.defineMacro("__STDC_UTF_16__", "1"); Builder.defineMacro("__STDC_UTF_32__", "1"); + // __has_embed definitions + Builder.defineMacro("__STDC_EMBED_NOT_FOUND__", + llvm::itostr(static_cast(EmbedResult::NotFound))); + Builder.defineMacro("__STDC_EMBED_FOUND__", + llvm::itostr(static_cast(EmbedResult::Found))); + Builder.defineMacro("__STDC_EMBED_EMPTY__", + llvm::itostr(static_cast(EmbedResult::Empty))); + if (LangOpts.ObjC) Builder.defineMacro("__OBJC__"); diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp index a26d2c3ab8582b..0592423c12eca4 100644 --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "clang/Frontend/Utils.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/SourceManager.h" #include "clang/Frontend/PreprocessorOutputOptions.h" +#include "clang/Frontend/Utils.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Pragma.h" @@ -93,6 +93,7 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { bool DisableLineMarkers; bool DumpDefines; bool DumpIncludeDirectives; + bool DumpEmbedDirectives; bool UseLineDirectives; bool IsFirstFileEntered; bool MinimizeWhitespace; @@ -100,6 +101,7 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { bool KeepSystemIncludes; raw_ostream *OrigOS; std::unique_ptr NullOS; + unsigned NumToksToSkip; Token PrevTok; Token PrevPrevTok; @@ -107,14 +109,16 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { public: PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers, bool defines, bool DumpIncludeDirectives, - bool UseLineDirectives, bool MinimizeWhitespace, - bool DirectivesOnly, bool KeepSystemIncludes) + bool DumpEmbedDirectives, bool UseLineDirectives, + bool MinimizeWhitespace, bool DirectivesOnly, + bool KeepSystemIncludes) : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os), DisableLineMarkers(lineMarkers), DumpDefines(defines), DumpIncludeDirectives(DumpIncludeDirectives), + DumpEmbedDirectives(DumpEmbedDirectives), UseLineDirectives(UseLineDirectives), MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly), - KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) { + KeepSystemIncludes(KeepSystemIncludes), OrigOS(os), NumToksToSkip(0) { CurLine = 0; CurFilename += ""; EmittedTokensOnThisLine = false; @@ -129,6 +133,10 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { PrevPrevTok.startToken(); } + /// Returns true if #embed directives should be expanded into a comma- + /// delimited list of integer constants or not. + bool expandEmbedContents() const { return !DumpEmbedDirectives; } + bool isMinimizeWhitespace() const { return MinimizeWhitespace; } void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; } @@ -149,6 +157,9 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { void FileChanged(SourceLocation Loc, FileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID) override; + void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled, + OptionalFileEntryRef File, + const LexEmbedParametersResult &Params) override; void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, @@ -232,6 +243,9 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { void BeginModule(const Module *M); void EndModule(const Module *M); + + unsigned GetNumToksToSkip() const { return NumToksToSkip; } + void ResetSkipToks() { NumToksToSkip = 0; } }; } // end anonymous namespace @@ -399,6 +413,74 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc, } } +void PrintPPOutputPPCallbacks::EmbedDirective( + SourceLocation HashLoc, StringRef FileName, bool IsAngled, + OptionalFileEntryRef File, const LexEmbedParametersResult &Params) { + if (!DumpEmbedDirectives) + return; + + // The EmbedDirective() callback is called before we produce the annotation + // token stream for the directive. We skip printing the annotation tokens + // within PrintPreprocessedTokens(), but we also need to skip the prefix, + // suffix, and if_empty tokens as those are inserted directly into the token + // stream and would otherwise be printed immediately after printing the + // #embed directive. + // + // FIXME: counting tokens to skip is a kludge but we have no way to know + // which tokens were inserted as part of the embed and which ones were + // explicitly written by the user. + MoveToLine(HashLoc, /*RequireStartOfLine=*/true); + *OS << "#embed " << (IsAngled ? '<' : '"') << FileName + << (IsAngled ? '>' : '"'); + + auto PrintToks = [&](llvm::ArrayRef Toks) { + SmallString<128> SpellingBuffer; + for (const Token &T : Toks) { + if (T.hasLeadingSpace()) + *OS << " "; + *OS << PP.getSpelling(T, SpellingBuffer); + } + }; + bool SkipAnnotToks = true; + if (Params.MaybeIfEmptyParam) { + *OS << " if_empty("; + PrintToks(Params.MaybeIfEmptyParam->Tokens); + *OS << ")"; + // If the file is empty, we can skip those tokens. If the file is not + // empty, we skip the annotation tokens. + if (File && !File->getSize()) { + NumToksToSkip += Params.MaybeIfEmptyParam->Tokens.size(); + SkipAnnotToks = false; + } + } + + if (Params.MaybeLimitParam) { + *OS << " limit(" << Params.MaybeLimitParam->Limit << ")"; + } + if (Params.MaybeOffsetParam) { + *OS << " clang::offset(" << Params.MaybeOffsetParam->Offset << ")"; + } + if (Params.MaybePrefixParam) { + *OS << " prefix("; + PrintToks(Params.MaybePrefixParam->Tokens); + *OS << ")"; + NumToksToSkip += Params.MaybePrefixParam->Tokens.size(); + } + if (Params.MaybeSuffixParam) { + *OS << " suffix("; + PrintToks(Params.MaybeSuffixParam->Tokens); + *OS << ")"; + NumToksToSkip += Params.MaybeSuffixParam->Tokens.size(); + } + + // We may need to skip the annotation token. + if (SkipAnnotToks) + NumToksToSkip++; + + *OS << " /* clang -E -dE */"; + setEmittedDirectiveOnThisLine(); +} + void PrintPPOutputPPCallbacks::InclusionDirective( SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, @@ -678,7 +760,7 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok, if (Tok.is(tok::eof) || (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) && !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) && - !Tok.is(tok::annot_repl_input_end))) + !Tok.is(tok::annot_repl_input_end) && !Tok.is(tok::annot_embed))) return; // EmittedDirectiveOnThisLine takes priority over RequireSameLine. @@ -878,6 +960,27 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, std::string Name = M->getFullModuleName(); Callbacks->OS->write(Name.data(), Name.size()); Callbacks->HandleNewlinesInToken(Name.data(), Name.size()); + } else if (Tok.is(tok::annot_embed)) { + // Manually explode the binary data out to a stream of comma-delimited + // integer values. If the user passed -dE, that is handled by the + // EmbedDirective() callback. We should only get here if the user did not + // pass -dE. + assert(Callbacks->expandEmbedContents() && + "did not expect an embed annotation"); + auto *Data = + reinterpret_cast(Tok.getAnnotationValue()); + + // Loop over the contents and print them as a comma-delimited list of + // values. + bool PrintComma = false; + for (auto Iter = Data->BinaryData.begin(), End = Data->BinaryData.end(); + Iter != End; ++Iter) { + if (PrintComma) + *Callbacks->OS << ", "; + *Callbacks->OS << static_cast(*Iter); + PrintComma = true; + } + IsStartOfLine = true; } else if (Tok.isAnnotation()) { // Ignore annotation tokens created by pragmas - the pragmas themselves // will be reproduced in the preprocessed output. @@ -926,6 +1029,10 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, if (Tok.is(tok::eof)) break; PP.Lex(Tok); + // If lexing that token causes us to need to skip future tokens, do so now. + for (unsigned I = 0, Skip = Callbacks->GetNumToksToSkip(); I < Skip; ++I) + PP.Lex(Tok); + Callbacks->ResetSkipToks(); } } @@ -982,8 +1089,9 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks( PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros, - Opts.ShowIncludeDirectives, Opts.UseLineDirectives, - Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes); + Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives, + Opts.UseLineDirectives, Opts.MinimizeWhitespace, Opts.DirectivesOnly, + Opts.KeepSystemIncludes); // Expand macros in pragmas with -fms-extensions. The assumption is that // the majority of pragmas in such a file will be Microsoft pragmas. diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 8e7386449dcedb..94410bc7e078ab 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -19,6 +19,7 @@ #include "clang/Basic/Module.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" #include "clang/Basic/TokenKinds.h" #include "clang/Lex/CodeCompletionHandler.h" #include "clang/Lex/HeaderSearch.h" @@ -39,6 +40,7 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/AlignOf.h" @@ -82,8 +84,7 @@ Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc, /// Read and discard all tokens remaining on the current line until /// the tok::eod token is found. -SourceRange Preprocessor::DiscardUntilEndOfDirective() { - Token Tmp; +SourceRange Preprocessor::DiscardUntilEndOfDirective(Token &Tmp) { SourceRange Res; LexUnexpandedToken(Tmp); @@ -1073,6 +1074,74 @@ OptionalFileEntryRef Preprocessor::LookupFile( return std::nullopt; } +OptionalFileEntryRef +Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile, + const FileEntry *LookupFromFile) { + FileManager &FM = this->getFileManager(); + if (llvm::sys::path::is_absolute(Filename)) { + // lookup path or immediately fail + llvm::Expected ShouldBeEntry = + FM.getFileRef(Filename, OpenFile); + return llvm::expectedToOptional(std::move(ShouldBeEntry)); + } + + auto SeparateComponents = [](SmallVectorImpl &LookupPath, + StringRef StartingFrom, StringRef FileName, + bool RemoveInitialFileComponentFromLookupPath) { + llvm::sys::path::native(StartingFrom, LookupPath); + if (RemoveInitialFileComponentFromLookupPath) + llvm::sys::path::remove_filename(LookupPath); + if (!LookupPath.empty() && + !llvm::sys::path::is_separator(LookupPath.back())) { + LookupPath.push_back(llvm::sys::path::get_separator().front()); + } + LookupPath.append(FileName.begin(), FileName.end()); + }; + + // Otherwise, it's search time! + SmallString<512> LookupPath; + // Non-angled lookup + if (!isAngled) { + if (LookupFromFile) { + // Use file-based lookup. + StringRef FullFileDir = LookupFromFile->tryGetRealPathName(); + if (!FullFileDir.empty()) { + SeparateComponents(LookupPath, FullFileDir, Filename, true); + llvm::Expected ShouldBeEntry = + FM.getFileRef(LookupPath, OpenFile); + if (ShouldBeEntry) + return llvm::expectedToOptional(std::move(ShouldBeEntry)); + llvm::consumeError(ShouldBeEntry.takeError()); + } + } + + // Otherwise, do working directory lookup. + LookupPath.clear(); + auto MaybeWorkingDirEntry = FM.getDirectoryRef("."); + if (MaybeWorkingDirEntry) { + DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry; + StringRef WorkingDir = WorkingDirEntry.getName(); + if (!WorkingDir.empty()) { + SeparateComponents(LookupPath, WorkingDir, Filename, false); + llvm::Expected ShouldBeEntry = + FM.getFileRef(LookupPath, OpenFile); + if (ShouldBeEntry) + return llvm::expectedToOptional(std::move(ShouldBeEntry)); + llvm::consumeError(ShouldBeEntry.takeError()); + } + } + } + + for (const auto &Entry : PPOpts->EmbedEntries) { + LookupPath.clear(); + SeparateComponents(LookupPath, Entry, Filename, false); + llvm::Expected ShouldBeEntry = + FM.getFileRef(LookupPath, OpenFile); + return llvm::expectedToOptional(std::move(ShouldBeEntry)); + } + return std::nullopt; +} + //===----------------------------------------------------------------------===// // Preprocessor Directive Handling. //===----------------------------------------------------------------------===// @@ -1168,6 +1237,7 @@ void Preprocessor::HandleDirective(Token &Result) { case tok::pp_include_next: case tok::pp___include_macros: case tok::pp_pragma: + case tok::pp_embed: Diag(Result, diag::err_embedded_directive) << II->getName(); Diag(*ArgMacro, diag::note_macro_expansion_here) << ArgMacro->getIdentifierInfo(); @@ -1282,6 +1352,11 @@ void Preprocessor::HandleDirective(Token &Result) { return HandleIdentSCCSDirective(Result); case tok::pp_sccs: return HandleIdentSCCSDirective(Result); + case tok::pp_embed: + return HandleEmbedDirective(SavedHash.getLocation(), Result, + getCurrentFileLexer() + ? *getCurrentFileLexer()->getFileEntry() + : static_cast(nullptr)); case tok::pp_assert: //isExtension = true; // FIXME: implement #assert break; @@ -3543,3 +3618,399 @@ void Preprocessor::HandleElifFamilyDirective(Token &ElifToken, HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true, /*FoundElse*/ CI.FoundElse, ElifToken.getLocation()); } + +std::optional +Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) { + LexEmbedParametersResult Result{}; + SmallVector ParameterTokens; + tok::TokenKind EndTokenKind = ForHasEmbed ? tok::r_paren : tok::eod; + Result.ParamRange = {CurTok.getLocation(), CurTok.getLocation()}; + + auto DiagMismatchedBracesAndSkipToEOD = + [&](tok::TokenKind Expected, + std::pair Matches) { + Result.ParamRange.setEnd(CurTok.getEndLoc()); + Diag(CurTok, diag::err_expected) << Expected; + Diag(Matches.second, diag::note_matching) << Matches.first; + if (CurTok.isNot(tok::eod)) + DiscardUntilEndOfDirective(CurTok); + }; + + auto ExpectOrDiagAndSkipToEOD = [&](tok::TokenKind Kind) { + if (CurTok.isNot(Kind)) { + Result.ParamRange.setEnd(CurTok.getEndLoc()); + Diag(CurTok, diag::err_expected) << Kind; + if (CurTok.isNot(tok::eod)) + DiscardUntilEndOfDirective(CurTok); + return false; + } + return true; + }; + + // C23 6.10: + // pp-parameter-name: + // pp-standard-parameter + // pp-prefixed-parameter + // + // pp-standard-parameter: + // identifier + // + // pp-prefixed-parameter: + // identifier :: identifier + auto LexPPParameterName = [&]() -> std::optional { + // We expect the current token to be an identifier; if it's not, things + // have gone wrong. + if (!ExpectOrDiagAndSkipToEOD(tok::identifier)) + return std::nullopt; + + const IdentifierInfo *Prefix = CurTok.getIdentifierInfo(); + + // Lex another token; it is either a :: or we're done with the parameter + // name. + LexNonComment(CurTok); + if (CurTok.is(tok::coloncolon)) { + // We found a ::, so lex another identifier token. + LexNonComment(CurTok); + if (!ExpectOrDiagAndSkipToEOD(tok::identifier)) + return std::nullopt; + + const IdentifierInfo *Suffix = CurTok.getIdentifierInfo(); + + // Lex another token so we're past the name. + LexNonComment(CurTok); + return (llvm::Twine(Prefix->getName()) + "::" + Suffix->getName()).str(); + } + return Prefix->getName().str(); + }; + + // C23 6.10p5: In all aspects, a preprocessor standard parameter specified by + // this document as an identifier pp_param and an identifier of the form + // __pp_param__ shall behave the same when used as a preprocessor parameter, + // except for the spelling. + auto NormalizeParameterName = [](StringRef Name) { + if (Name.size() > 4 && Name.starts_with("__") && Name.ends_with("__")) + return Name.substr(2, Name.size() - 4); + return Name; + }; + + auto LexParenthesizedIntegerExpr = [&]() -> std::optional { + // we have a limit parameter and its internals are processed using + // evaluation rules from #if. + if (!ExpectOrDiagAndSkipToEOD(tok::l_paren)) + return std::nullopt; + + // We do not consume the ( because EvaluateDirectiveExpression will lex + // the next token for us. + IdentifierInfo *ParameterIfNDef = nullptr; + bool EvaluatedDefined; + DirectiveEvalResult LimitEvalResult = EvaluateDirectiveExpression( + ParameterIfNDef, CurTok, EvaluatedDefined, /*CheckForEOD=*/false); + + if (!LimitEvalResult.Value) { + // If there was an error evaluating the directive expression, we expect + // to be at the end of directive token. + assert(CurTok.is(tok::eod) && "expect to be at the end of directive"); + return std::nullopt; + } + + if (!ExpectOrDiagAndSkipToEOD(tok::r_paren)) + return std::nullopt; + + // Eat the ). + LexNonComment(CurTok); + + // C23 6.10.3.2p2: The token defined shall not appear within the constant + // expression. + if (EvaluatedDefined) { + Diag(CurTok, diag::err_defined_in_pp_embed); + return std::nullopt; + } + + if (LimitEvalResult.Value) { + const llvm::APSInt &Result = *LimitEvalResult.Value; + if (Result.isNegative()) { + Diag(CurTok, diag::err_requires_positive_value) + << toString(Result, 10) << /*positive*/ 0; + return std::nullopt; + } + return Result.getLimitedValue(); + } + return std::nullopt; + }; + + auto GetMatchingCloseBracket = [](tok::TokenKind Kind) { + switch (Kind) { + case tok::l_paren: + return tok::r_paren; + case tok::l_brace: + return tok::r_brace; + case tok::l_square: + return tok::r_square; + default: + llvm_unreachable("should not get here"); + } + }; + + auto LexParenthesizedBalancedTokenSoup = + [&](llvm::SmallVectorImpl &Tokens) { + std::vector> BracketStack; + + // We expect the current token to be a left paren. + if (!ExpectOrDiagAndSkipToEOD(tok::l_paren)) + return false; + LexNonComment(CurTok); // Eat the ( + + bool WaitingForInnerCloseParen = false; + while (CurTok.isNot(tok::eod) && + (WaitingForInnerCloseParen || CurTok.isNot(tok::r_paren))) { + switch (CurTok.getKind()) { + default: // Shutting up diagnostics about not fully-covered switch. + break; + case tok::l_paren: + WaitingForInnerCloseParen = true; + [[fallthrough]]; + case tok::l_brace: + case tok::l_square: + BracketStack.push_back({CurTok.getKind(), CurTok.getLocation()}); + break; + case tok::r_paren: + WaitingForInnerCloseParen = false; + [[fallthrough]]; + case tok::r_brace: + case tok::r_square: { + tok::TokenKind Matching = + GetMatchingCloseBracket(BracketStack.back().first); + if (BracketStack.empty() || CurTok.getKind() != Matching) { + DiagMismatchedBracesAndSkipToEOD(Matching, BracketStack.back()); + return false; + } + BracketStack.pop_back(); + } break; + } + Tokens.push_back(CurTok); + LexNonComment(CurTok); + } + + // When we're done, we want to eat the closing paren. + if (!ExpectOrDiagAndSkipToEOD(tok::r_paren)) + return false; + + LexNonComment(CurTok); // Eat the ) + return true; + }; + + LexNonComment(CurTok); // Prime the pump. + while (!CurTok.isOneOf(EndTokenKind, tok::eod)) { + SourceLocation ParamStartLoc = CurTok.getLocation(); + std::optional ParamName = LexPPParameterName(); + if (!ParamName) + return std::nullopt; + StringRef Parameter = NormalizeParameterName(*ParamName); + + // Lex the parameters (dependent on the parameter type we want!). + // + // C23 6.10.3.Xp1: The X standard embed parameter may appear zero times or + // one time in the embed parameter sequence. + if (Parameter == "limit") { + if (Result.MaybeLimitParam) + Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter; + + std::optional Limit = LexParenthesizedIntegerExpr(); + if (!Limit) + return std::nullopt; + Result.MaybeLimitParam = + PPEmbedParameterLimit{*Limit, {ParamStartLoc, CurTok.getLocation()}}; + } else if (Parameter == "clang::offset") { + if (Result.MaybeOffsetParam) + Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter; + + std::optional Offset = LexParenthesizedIntegerExpr(); + if (!Offset) + return std::nullopt; + Result.MaybeOffsetParam = PPEmbedParameterOffset{ + *Offset, {ParamStartLoc, CurTok.getLocation()}}; + } else if (Parameter == "prefix") { + if (Result.MaybePrefixParam) + Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter; + + SmallVector Soup; + if (!LexParenthesizedBalancedTokenSoup(Soup)) + return std::nullopt; + Result.MaybePrefixParam = PPEmbedParameterPrefix{ + std::move(Soup), {ParamStartLoc, CurTok.getLocation()}}; + } else if (Parameter == "suffix") { + if (Result.MaybeSuffixParam) + Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter; + + SmallVector Soup; + if (!LexParenthesizedBalancedTokenSoup(Soup)) + return std::nullopt; + Result.MaybeSuffixParam = PPEmbedParameterSuffix{ + std::move(Soup), {ParamStartLoc, CurTok.getLocation()}}; + } else if (Parameter == "if_empty") { + if (Result.MaybeIfEmptyParam) + Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter; + + SmallVector Soup; + if (!LexParenthesizedBalancedTokenSoup(Soup)) + return std::nullopt; + Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{ + std::move(Soup), {ParamStartLoc, CurTok.getLocation()}}; + } else { + ++Result.UnrecognizedParams; + + // If there's a left paren, we need to parse a balanced token sequence + // and just eat those tokens. + if (CurTok.is(tok::l_paren)) { + SmallVector Soup; + if (!LexParenthesizedBalancedTokenSoup(Soup)) + return std::nullopt; + } + if (!ForHasEmbed) { + Diag(CurTok, diag::err_pp_unknown_parameter) << 1 << Parameter; + return std::nullopt; + } + } + } + Result.ParamRange.setEnd(CurTok.getLocation()); + return Result; +} + +void Preprocessor::HandleEmbedDirectiveImpl( + SourceLocation HashLoc, const LexEmbedParametersResult &Params, + StringRef BinaryContents) { + if (BinaryContents.empty()) { + // If we have no binary contents, the only thing we need to emit are the + // if_empty tokens, if any. + // FIXME: this loses AST fidelity; nothing in the compiler will see that + // these tokens came from #embed. We have to hack around this when printing + // preprocessed output. The same is true for prefix and suffix tokens. + if (Params.MaybeIfEmptyParam) { + ArrayRef Toks = Params.MaybeIfEmptyParam->Tokens; + size_t TokCount = Toks.size(); + auto NewToks = std::make_unique(TokCount); + llvm::copy(Toks, NewToks.get()); + EnterTokenStream(std::move(NewToks), TokCount, true, true); + } + return; + } + + size_t NumPrefixToks = Params.PrefixTokenCount(), + NumSuffixToks = Params.SuffixTokenCount(); + size_t TotalNumToks = 1 + NumPrefixToks + NumSuffixToks; + size_t CurIdx = 0; + auto Toks = std::make_unique(TotalNumToks); + + // Add the prefix tokens, if any. + if (Params.MaybePrefixParam) { + llvm::copy(Params.MaybePrefixParam->Tokens, &Toks[CurIdx]); + CurIdx += NumPrefixToks; + } + + EmbedAnnotationData *Data = new (BP) EmbedAnnotationData; + Data->BinaryData = BinaryContents; + + Toks[CurIdx].startToken(); + Toks[CurIdx].setKind(tok::annot_embed); + Toks[CurIdx].setAnnotationRange(HashLoc); + Toks[CurIdx++].setAnnotationValue(Data); + + // Now add the suffix tokens, if any. + if (Params.MaybeSuffixParam) { + llvm::copy(Params.MaybeSuffixParam->Tokens, &Toks[CurIdx]); + CurIdx += NumSuffixToks; + } + + assert(CurIdx == TotalNumToks && "Calculated the incorrect number of tokens"); + EnterTokenStream(std::move(Toks), TotalNumToks, true, true); +} + +void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, + const FileEntry *LookupFromFile) { + // Give the usual extension/compatibility warnings. + if (LangOpts.C23) + Diag(EmbedTok, diag::warn_compat_pp_embed_directive); + else + Diag(EmbedTok, diag::ext_pp_embed_directive) + << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0); + + // Parse the filename header + Token FilenameTok; + if (LexHeaderName(FilenameTok)) + return; + + if (FilenameTok.isNot(tok::header_name)) { + Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); + if (FilenameTok.isNot(tok::eod)) + DiscardUntilEndOfDirective(); + return; + } + + // Parse the optional sequence of + // directive-parameters: + // identifier parameter-name-list[opt] directive-argument-list[opt] + // directive-argument-list: + // '(' balanced-token-sequence ')' + // parameter-name-list: + // '::' identifier parameter-name-list[opt] + Token CurTok; + std::optional Params = + LexEmbedParameters(CurTok, /*ForHasEmbed=*/false); + + assert((Params || CurTok.is(tok::eod)) && + "expected success or to be at the end of the directive"); + if (!Params) + return; + + // Now, splat the data out! + SmallString<128> FilenameBuffer; + StringRef Filename = getSpelling(FilenameTok, FilenameBuffer); + StringRef OriginalFilename = Filename; + bool isAngled = + GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); + // If GetIncludeFilenameSpelling set the start ptr to null, there was an + // error. + assert(!Filename.empty()); + OptionalFileEntryRef MaybeFileRef = + this->LookupEmbedFile(Filename, isAngled, true, LookupFromFile); + if (!MaybeFileRef) { + // could not find file + if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) { + return; + } + Diag(FilenameTok, diag::err_pp_file_not_found) << Filename; + return; + } + std::optional MaybeFile = + getSourceManager().getMemoryBufferForFileOrNone(*MaybeFileRef); + if (!MaybeFile) { + // could not find file + Diag(FilenameTok, diag::err_cannot_open_file) + << Filename << "a buffer to the contents could not be created"; + return; + } + StringRef BinaryContents = MaybeFile->getBuffer(); + + // The order is important between 'offset' and 'limit'; we want to offset + // first and then limit second; otherwise we may reduce the notional resource + // size to something too small to offset into. + if (Params->MaybeOffsetParam) { + // FIXME: just like with the limit() and if_empty() parameters, this loses + // source fidelity in the AST; it has no idea that there was an offset + // involved. + // offsets all the way to the end of the file make for an empty file. + BinaryContents = BinaryContents.substr(Params->MaybeOffsetParam->Offset); + } + + if (Params->MaybeLimitParam) { + // FIXME: just like with the clang::offset() and if_empty() parameters, + // this loses source fidelity in the AST; it has no idea there was a limit + // involved. + BinaryContents = BinaryContents.substr(0, Params->MaybeLimitParam->Limit); + } + + if (Callbacks) + Callbacks->EmbedDirective(HashLoc, Filename, isAngled, MaybeFileRef, + *Params); + HandleEmbedDirectiveImpl(HashLoc, *Params, BinaryContents); +} diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp index f267efabd617fd..8bb82bd22eb98f 100644 --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -870,7 +870,9 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, /// may occur after a #if or #elif directive. If the expression is equivalent /// to "!defined(X)" return X in IfNDefMacro. Preprocessor::DirectiveEvalResult -Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { +Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, + Token &Tok, bool &EvaluatedDefined, + bool CheckForEoD) { SaveAndRestore PPDir(ParsingIfOrElifDirective, true); // Save the current state of 'DisableMacroExpansion' and reset it to false. If // 'DisableMacroExpansion' is true, then we must be in a macro argument list @@ -882,7 +884,6 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { DisableMacroExpansion = false; // Peek ahead one token. - Token Tok; LexNonComment(Tok); // C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t. @@ -895,7 +896,7 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { // Parse error, skip the rest of the macro line. SourceRange ConditionRange = ExprStartLoc; if (Tok.isNot(tok::eod)) - ConditionRange = DiscardUntilEndOfDirective(); + ConditionRange = DiscardUntilEndOfDirective(Tok); // Restore 'DisableMacroExpansion'. DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; @@ -903,11 +904,14 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { // We cannot trust the source range from the value because there was a // parse error. Track the range manually -- the end of the directive is the // end of the condition range. - return {false, + return {std::nullopt, + false, DT.IncludedUndefinedIds, {ExprStartLoc, ConditionRange.getEnd()}}; } + EvaluatedDefined = DT.State != DefinedTracker::Unknown; + // If we are at the end of the expression after just parsing a value, there // must be no (unparenthesized) binary operators involved, so we can exit // directly. @@ -919,7 +923,10 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { // Restore 'DisableMacroExpansion'. DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; - return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()}; + bool IsNonZero = ResVal.Val != 0; + SourceRange ValRange = ResVal.getRange(); + return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds, + ValRange}; } // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the @@ -928,21 +935,37 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { Tok, true, DT.IncludedUndefinedIds, *this)) { // Parse error, skip the rest of the macro line. if (Tok.isNot(tok::eod)) - DiscardUntilEndOfDirective(); + DiscardUntilEndOfDirective(Tok); // Restore 'DisableMacroExpansion'. DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; - return {false, DT.IncludedUndefinedIds, ResVal.getRange()}; + SourceRange ValRange = ResVal.getRange(); + return {std::nullopt, false, DT.IncludedUndefinedIds, ValRange}; } - // If we aren't at the tok::eod token, something bad happened, like an extra - // ')' token. - if (Tok.isNot(tok::eod)) { - Diag(Tok, diag::err_pp_expected_eol); - DiscardUntilEndOfDirective(); + if (CheckForEoD) { + // If we aren't at the tok::eod token, something bad happened, like an extra + // ')' token. + if (Tok.isNot(tok::eod)) { + Diag(Tok, diag::err_pp_expected_eol); + DiscardUntilEndOfDirective(Tok); + } } + EvaluatedDefined = EvaluatedDefined || DT.State != DefinedTracker::Unknown; + // Restore 'DisableMacroExpansion'. DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; - return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()}; + bool IsNonZero = ResVal.Val != 0; + SourceRange ValRange = ResVal.getRange(); + return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds, ValRange}; +} + +Preprocessor::DirectiveEvalResult +Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, + bool CheckForEoD) { + Token Tok; + bool EvaluatedDefined; + return EvaluateDirectiveExpression(IfNDefMacro, Tok, EvaluatedDefined, + CheckForEoD); } diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index f085b943716442..3913ff08c2eb55 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -380,6 +380,7 @@ void Preprocessor::RegisterBuiltinMacros() { Ident__has_c_attribute = nullptr; Ident__has_declspec = RegisterBuiltinMacro(*this, "__has_declspec_attribute"); + Ident__has_embed = RegisterBuiltinMacro(*this, "__has_embed"); Ident__has_include = RegisterBuiltinMacro(*this, "__has_include"); Ident__has_include_next = RegisterBuiltinMacro(*this, "__has_include_next"); Ident__has_warning = RegisterBuiltinMacro(*this, "__has_warning"); @@ -1279,6 +1280,105 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II, return File.has_value(); } +/// EvaluateHasEmbed - Process a '__has_embed("foo" params...)' expression. +/// Returns a filled optional with the value if successful; otherwise, empty. +EmbedResult Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) { + // These expressions are only allowed within a preprocessor directive. + if (!this->isParsingIfOrElifDirective()) { + Diag(Tok, diag::err_pp_directive_required) << II; + // Return a valid identifier token. + assert(Tok.is(tok::identifier)); + Tok.setIdentifierInfo(II); + return EmbedResult::Invalid; + } + + // Ensure we have a '('. + LexUnexpandedToken(Tok); + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_pp_expected_after) << II << tok::l_paren; + // If the next token looks like a filename or the start of one, + // assume it is and process it as such. + return EmbedResult::Invalid; + } + + // Save '(' location for possible missing ')' message and then lex the header + // name token for the embed resource. + SourceLocation LParenLoc = Tok.getLocation(); + if (this->LexHeaderName(Tok)) + return EmbedResult::Invalid; + + if (Tok.isNot(tok::header_name)) { + Diag(Tok.getLocation(), diag::err_pp_expects_filename); + return EmbedResult::Invalid; + } + + SourceLocation FilenameLoc = Tok.getLocation(); + Token FilenameTok = Tok; + + std::optional Params = + this->LexEmbedParameters(Tok, /*ForHasEmbed=*/true); + assert((Params || Tok.is(tok::eod)) && + "expected success or to be at the end of the directive"); + + if (!Params) + return EmbedResult::Invalid; + + if (Params->UnrecognizedParams > 0) + return EmbedResult::NotFound; + + if (!Tok.is(tok::r_paren)) { + Diag(this->getLocForEndOfToken(FilenameLoc), diag::err_pp_expected_after) + << II << tok::r_paren; + Diag(LParenLoc, diag::note_matching) << tok::l_paren; + if (Tok.isNot(tok::eod)) + DiscardUntilEndOfDirective(); + return EmbedResult::Invalid; + } + + SmallString<128> FilenameBuffer; + StringRef Filename = this->getSpelling(FilenameTok, FilenameBuffer); + bool isAngled = + this->GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); + // If GetIncludeFilenameSpelling set the start ptr to null, there was an + // error. + assert(!Filename.empty()); + const FileEntry *LookupFromFile = + this->getCurrentFileLexer() ? *this->getCurrentFileLexer()->getFileEntry() + : static_cast(nullptr); + OptionalFileEntryRef MaybeFileEntry = + this->LookupEmbedFile(Filename, isAngled, false, LookupFromFile); + if (Callbacks) { + Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry); + } + if (!MaybeFileEntry) + return EmbedResult::NotFound; + + size_t FileSize = MaybeFileEntry->getSize(); + // First, "offset" into the file (this reduces the amount of data we can read + // from the file). + if (Params->MaybeOffsetParam) { + if (Params->MaybeOffsetParam->Offset > FileSize) + FileSize = 0; + else + FileSize -= Params->MaybeOffsetParam->Offset; + } + + // Second, limit the data from the file (this also reduces the amount of data + // we can read from the file). + if (Params->MaybeLimitParam) { + if (Params->MaybeLimitParam->Limit > FileSize) + FileSize = 0; + else + FileSize = Params->MaybeLimitParam->Limit; + } + + // If we have no data left to read, the file is empty, otherwise we have the + // expected resource. + if (FileSize == 0) + return EmbedResult::Empty; + return EmbedResult::Found; +} + bool Preprocessor::EvaluateHasInclude(Token &Tok, IdentifierInfo *II) { return EvaluateHasIncludeCommon(Tok, II, *this, nullptr, nullptr); } @@ -1820,6 +1920,17 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { return; OS << (int)Value; Tok.setKind(tok::numeric_constant); + } else if (II == Ident__has_embed) { + // The argument to these two builtins should be a parenthesized + // file name string literal using angle brackets (<>) or + // double-quotes (""), optionally followed by a series of + // arguments similar to form like attributes. + EmbedResult Value = EvaluateHasEmbed(Tok, II); + if (Value == EmbedResult::Invalid) + return; + + Tok.setKind(tok::numeric_constant); + OS << static_cast(Value); } else if (II == Ident__has_warning) { // The argument should be a parenthesized string literal. EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false, diff --git a/clang/lib/Lex/TokenConcatenation.cpp b/clang/lib/Lex/TokenConcatenation.cpp index 1b3201bd805bf9..865879d1805336 100644 --- a/clang/lib/Lex/TokenConcatenation.cpp +++ b/clang/lib/Lex/TokenConcatenation.cpp @@ -193,9 +193,12 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, if (Tok.isAnnotation()) { // Modules annotation can show up when generated automatically for includes. assert(Tok.isOneOf(tok::annot_module_include, tok::annot_module_begin, - tok::annot_module_end) && + tok::annot_module_end, tok::annot_embed) && "unexpected annotation in AvoidConcat"); + ConcatInfo = 0; + if (Tok.is(tok::annot_embed)) + return true; } if (ConcatInfo == 0) diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index eb7447fa038e47..9fc3cd73f73a0f 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -1066,6 +1066,21 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind, break; } + case tok::annot_embed: { + // We've met #embed in a context where a single value is expected. Take last + // element from #embed data as if it were a comma expression. + EmbedAnnotationData *Data = + reinterpret_cast(Tok.getAnnotationValue()); + SourceLocation StartLoc = ConsumeAnnotationToken(); + ASTContext &Context = Actions.getASTContext(); + Res = IntegerLiteral::Create(Context, + llvm::APInt(CHAR_BIT, Data->BinaryData.back()), + Context.UnsignedCharTy, StartLoc); + if (Data->BinaryData.size() > 1) + Diag(StartLoc, diag::warn_unused_comma_left_operand); + break; + } + case tok::kw___super: case tok::kw_decltype: // Annotate the token and tail recurse. @@ -3563,6 +3578,17 @@ ExprResult Parser::ParseFoldExpression(ExprResult LHS, T.getCloseLocation()); } +void Parser::ExpandEmbedDirective(SmallVectorImpl &Exprs) { + EmbedAnnotationData *Data = + reinterpret_cast(Tok.getAnnotationValue()); + SourceLocation StartLoc = ConsumeAnnotationToken(); + ASTContext &Context = Actions.getASTContext(); + for (auto Byte : Data->BinaryData) { + Exprs.push_back(IntegerLiteral::Create(Context, llvm::APInt(CHAR_BIT, Byte), + Context.UnsignedCharTy, StartLoc)); + } +} + /// ParseExpressionList - Used for C/C++ (argument-)expression-list. /// /// \verbatim @@ -3598,8 +3624,17 @@ bool Parser::ParseExpressionList(SmallVectorImpl &Exprs, if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace)) { Diag(Tok, diag::warn_cxx98_compat_generalized_initializer_lists); Expr = ParseBraceInitializer(); - } else + } else if (Tok.is(tok::annot_embed)) { + ExpandEmbedDirective(Exprs); + if (Tok.isNot(tok::comma)) + break; + Token Comma = Tok; + ConsumeToken(); + checkPotentialAngleBracketDelimiter(Comma); + continue; + } else { Expr = ParseAssignmentExpression(); + } if (EarlyTypoCorrection) Expr = Actions.CorrectDelayedTyposInExpr(Expr); diff --git a/clang/lib/Parse/ParseInit.cpp b/clang/lib/Parse/ParseInit.cpp index 432ddc74b1087b..0a9a359cdaf979 100644 --- a/clang/lib/Parse/ParseInit.cpp +++ b/clang/lib/Parse/ParseInit.cpp @@ -428,6 +428,34 @@ ExprResult Parser::ParseInitializerWithPotentialDesignator( return ExprError(); } +ExprResult Parser::createEmbedExpr() { + assert(Tok.getKind() == tok::annot_embed); + EmbedAnnotationData *Data = + reinterpret_cast(Tok.getAnnotationValue()); + ExprResult Res; + ASTContext &Context = Actions.getASTContext(); + SourceLocation StartLoc = ConsumeAnnotationToken(); + if (Data->BinaryData.size() == 1) { + Res = IntegerLiteral::Create(Context, + llvm::APInt(CHAR_BIT, Data->BinaryData.back()), + Context.UnsignedCharTy, StartLoc); + } else { + auto CreateStringLiteralFromStringRef = [&](StringRef Str, QualType Ty) { + llvm::APSInt ArraySize = + Context.MakeIntValue(Str.size(), Context.getSizeType()); + QualType ArrayTy = Context.getConstantArrayType( + Ty, ArraySize, nullptr, ArraySizeModifier::Normal, 0); + return StringLiteral::Create(Context, Str, StringLiteralKind::Ordinary, + false, ArrayTy, StartLoc); + }; + + StringLiteral *BinaryDataArg = CreateStringLiteralFromStringRef( + Data->BinaryData, Context.UnsignedCharTy); + Res = Actions.ActOnEmbedExpr(StartLoc, BinaryDataArg); + } + return Res; +} + /// ParseBraceInitializer - Called when parsing an initializer that has a /// leading open brace. /// @@ -501,6 +529,8 @@ ExprResult Parser::ParseBraceInitializer() { ExprResult SubElt; if (MayBeDesignationStart()) SubElt = ParseInitializerWithPotentialDesignator(DesignatorCompletion); + else if (Tok.getKind() == tok::annot_embed) + SubElt = createEmbedExpr(); else SubElt = ParseInitializer(); diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp index a5130f56600e54..7e30afa2c64a4f 100644 --- a/clang/lib/Parse/ParseTemplate.cpp +++ b/clang/lib/Parse/ParseTemplate.cpp @@ -1523,6 +1523,19 @@ ParsedTemplateArgument Parser::ParseTemplateArgument() { ExprArg.get(), Loc); } +void Parser::ExpandEmbedIntoTemplateArgList(TemplateArgList &TemplateArgs) { + EmbedAnnotationData *Data = + reinterpret_cast(Tok.getAnnotationValue()); + SourceLocation StartLoc = ConsumeAnnotationToken(); + ASTContext &Context = Actions.getASTContext(); + for (auto Byte : Data->BinaryData) { + Expr *E = IntegerLiteral::Create(Context, llvm::APInt(CHAR_BIT, Byte), + Context.UnsignedCharTy, StartLoc); + TemplateArgs.push_back( + ParsedTemplateArgument(ParsedTemplateArgument::NonType, E, StartLoc)); + } +} + /// ParseTemplateArgumentList - Parse a C++ template-argument-list /// (C++ [temp.names]). Returns true if there was an error. /// @@ -1547,19 +1560,23 @@ bool Parser::ParseTemplateArgumentList(TemplateArgList &TemplateArgs, do { PreferredType.enterFunctionArgument(Tok.getLocation(), RunSignatureHelp); - ParsedTemplateArgument Arg = ParseTemplateArgument(); - SourceLocation EllipsisLoc; - if (TryConsumeToken(tok::ellipsis, EllipsisLoc)) - Arg = Actions.ActOnPackExpansion(Arg, EllipsisLoc); - - if (Arg.isInvalid()) { - if (PP.isCodeCompletionReached() && !CalledSignatureHelp) - RunSignatureHelp(); - return true; - } + if (Tok.is(tok::annot_embed)) { + ExpandEmbedIntoTemplateArgList(TemplateArgs); + } else { + ParsedTemplateArgument Arg = ParseTemplateArgument(); + SourceLocation EllipsisLoc; + if (TryConsumeToken(tok::ellipsis, EllipsisLoc)) + Arg = Actions.ActOnPackExpansion(Arg, EllipsisLoc); + + if (Arg.isInvalid()) { + if (PP.isCodeCompletionReached() && !CalledSignatureHelp) + RunSignatureHelp(); + return true; + } - // Save this template argument. - TemplateArgs.push_back(Arg); + // Save this template argument. + TemplateArgs.push_back(Arg); + } // If the next token is a comma, consume it and keep reading // arguments. diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 17acfca6b01126..0febfa85b93dd0 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1414,6 +1414,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Expr::PackIndexingExprClass: case Expr::StringLiteralClass: case Expr::SourceLocExprClass: + case Expr::EmbedExprClass: case Expr::ConceptSpecializationExprClass: case Expr::RequiresExprClass: // These expressions can never throw. diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 4db8b4130c3c78..b0402d145f1f1c 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -3711,7 +3711,7 @@ bool Sema::CheckLoopHintExpr(Expr *E, SourceLocation Loc, bool AllowZero) { bool ValueIsPositive = AllowZero ? ValueAPS.isNonNegative() : ValueAPS.isStrictlyPositive(); if (!ValueIsPositive || ValueAPS.getActiveBits() > 31) { - Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_argument_value) + Diag(E->getExprLoc(), diag::err_requires_positive_value) << toString(ValueAPS, 10) << ValueIsPositive; return true; } @@ -7290,8 +7290,8 @@ Sema::BuildInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList, } } - InitListExpr *E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList, - RBraceLoc); + InitListExpr *E = + new (Context) InitListExpr(Context, LBraceLoc, InitArgList, RBraceLoc); E->setType(Context.VoidTy); // FIXME: just a place holder for now. return E; } @@ -16699,6 +16699,15 @@ ExprResult Sema::BuildSourceLocExpr(SourceLocIdentKind Kind, QualType ResultTy, SourceLocExpr(Context, Kind, ResultTy, BuiltinLoc, RPLoc, ParentContext); } +ExprResult Sema::ActOnEmbedExpr(SourceLocation EmbedKeywordLoc, + StringLiteral *BinaryData) { + EmbedDataStorage *Data = new (Context) EmbedDataStorage; + Data->BinaryData = BinaryData; + return new (Context) + EmbedExpr(Context, EmbedKeywordLoc, Data, /*NumOfElements=*/0, + Data->getDataElementCount()); +} + static bool maybeDiagnoseAssignmentToFunction(Sema &S, QualType DstType, const Expr *SrcExpr) { if (!DstType->isFunctionPointerType() || diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index e805834c0fd38e..d966dba51e2cc1 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -313,6 +313,8 @@ class InitListChecker { InitListExpr *FullyStructuredList = nullptr; NoInitExpr *DummyExpr = nullptr; SmallVectorImpl *AggrDeductionCandidateParamTypes = nullptr; + EmbedExpr *CurEmbed = nullptr; // Save current embed we're processing. + unsigned CurEmbedIndex = 0; NoInitExpr *getDummyInit() { if (!DummyExpr) @@ -501,6 +503,42 @@ class InitListChecker { void CheckEmptyInitializable(const InitializedEntity &Entity, SourceLocation Loc); + Expr *HandleEmbed(EmbedExpr *Embed, const InitializedEntity &Entity) { + Expr *Result = nullptr; + // Undrestand which part of embed we'd like to reference. + if (!CurEmbed) { + CurEmbed = Embed; + CurEmbedIndex = 0; + } + // Reference just one if we're initializing a single scalar. + uint64_t ElsCount = 1; + // Otherwise try to fill whole array with embed data. + if (Entity.getKind() == InitializedEntity::EK_ArrayElement) { + ValueDecl *ArrDecl = Entity.getParent()->getDecl(); + auto *AType = SemaRef.Context.getAsArrayType(ArrDecl->getType()); + assert(AType && "expected array type when initializing array"); + ElsCount = Embed->getDataElementCount(); + if (const auto *CAType = dyn_cast(AType)) + ElsCount = std::min(CAType->getSize().getZExtValue(), + ElsCount - CurEmbedIndex); + if (ElsCount == Embed->getDataElementCount()) { + CurEmbed = nullptr; + CurEmbedIndex = 0; + return Embed; + } + } + + Result = new (SemaRef.Context) + EmbedExpr(SemaRef.Context, Embed->getLocation(), Embed->getData(), + CurEmbedIndex, ElsCount); + CurEmbedIndex += ElsCount; + if (CurEmbedIndex >= Embed->getDataElementCount()) { + CurEmbed = nullptr; + CurEmbedIndex = 0; + } + return Result; + } + public: InitListChecker( Sema &S, const InitializedEntity &Entity, InitListExpr *IL, QualType &T, @@ -1473,6 +1511,9 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity, // Brace elision is never performed if the element is not an // assignment-expression. if (Seq || isa(expr)) { + if (auto *Embed = dyn_cast(expr)) { + expr = HandleEmbed(Embed, Entity); + } if (!VerifyOnly) { ExprResult Result = Seq.Perform(SemaRef, TmpEntity, Kind, expr); if (Result.isInvalid()) @@ -1486,7 +1527,8 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity, UpdateStructuredListElement(StructuredList, StructuredIndex, getDummyInit()); } - ++Index; + if (!CurEmbed) + ++Index; if (AggrDeductionCandidateParamTypes) AggrDeductionCandidateParamTypes->push_back(ElemType); return; @@ -1679,6 +1721,8 @@ void InitListChecker::CheckScalarType(const InitializedEntity &Entity, ++Index; ++StructuredIndex; return; + } else if (auto *Embed = dyn_cast(expr)) { + expr = HandleEmbed(Embed, Entity); } ExprResult Result; @@ -1700,14 +1744,16 @@ void InitListChecker::CheckScalarType(const InitializedEntity &Entity, else { ResultExpr = Result.getAs(); - if (ResultExpr != expr && !VerifyOnly) { + if (ResultExpr != expr && !VerifyOnly && !CurEmbed) { // The type was promoted, update initializer list. // FIXME: Why are we updating the syntactic init list? IList->setInit(Index, ResultExpr); } } + UpdateStructuredListElement(StructuredList, StructuredIndex, ResultExpr); - ++Index; + if (!CurEmbed) + ++Index; if (AggrDeductionCandidateParamTypes) AggrDeductionCandidateParamTypes->push_back(DeclType); } @@ -1946,6 +1992,30 @@ static bool checkDestructorReference(QualType ElementType, SourceLocation Loc, return SemaRef.DiagnoseUseOfDecl(Destructor, Loc); } +static bool canInitializeArrayWithEmbedDataString(ArrayRef ExprList, + QualType InitType, + ASTContext &Context) { + // Only one initializer, it's an embed and the types match; + EmbedExpr *EE = + ExprList.size() == 1 + ? dyn_cast_if_present(ExprList[0]->IgnoreParens()) + : nullptr; + if (!EE) + return false; + + if (InitType->isArrayType()) { + const ArrayType *InitArrayType = InitType->getAsArrayTypeUnsafe(); + QualType InitElementTy = InitArrayType->getElementType(); + QualType EmbedExprElementTy = EE->getType(); + const bool TypesMatch = + Context.typesAreCompatible(InitElementTy, EmbedExprElementTy) || + (InitElementTy->isCharType() && EmbedExprElementTy->isCharType()); + if (TypesMatch) + return true; + } + return false; +} + void InitListChecker::CheckArrayType(const InitializedEntity &Entity, InitListExpr *IList, QualType &DeclType, llvm::APSInt elementIndex, @@ -1963,6 +2033,12 @@ void InitListChecker::CheckArrayType(const InitializedEntity &Entity, } } + if (canInitializeArrayWithEmbedDataString(IList->inits(), DeclType, + SemaRef.Context)) { + EmbedExpr *Embed = cast(IList->inits()[0]); + IList->setInit(0, Embed->getDataStringLiteral()); + } + // Check for the special-case of initializing an array with a string. if (Index < IList->getNumInits()) { if (IsStringInit(IList->getInit(Index), arrayType, SemaRef.Context) == @@ -2065,13 +2141,24 @@ void InitListChecker::CheckArrayType(const InitializedEntity &Entity, if (maxElementsKnown && elementIndex == maxElements) break; - InitializedEntity ElementEntity = - InitializedEntity::InitializeElement(SemaRef.Context, StructuredIndex, - Entity); + InitializedEntity ElementEntity = InitializedEntity::InitializeElement( + SemaRef.Context, StructuredIndex, Entity); + + unsigned EmbedElementIndexBeforeInit = CurEmbedIndex; // Check this element. CheckSubElementType(ElementEntity, IList, elementType, Index, StructuredList, StructuredIndex); ++elementIndex; + if ((CurEmbed || isa(Init)) && elementType->isScalarType()) { + if (CurEmbed) { + elementIndex = + elementIndex + CurEmbedIndex - EmbedElementIndexBeforeInit - 1; + } else { + auto Embed = cast(Init); + elementIndex = elementIndex + Embed->getDataElementCount() - + EmbedElementIndexBeforeInit - 1; + } + } // If the array is of incomplete type, keep track of the number of // elements in the initializer. @@ -9077,19 +9164,18 @@ ExprResult InitializationSequence::Perform(Sema &S, } } } - + Expr *Init = CurInit.get(); CheckedConversionKind CCK = Kind.isCStyleCast() ? CheckedConversionKind::CStyleCast : Kind.isFunctionalCast() ? CheckedConversionKind::FunctionalCast : Kind.isExplicitCast() ? CheckedConversionKind::OtherCast : CheckedConversionKind::Implicit; - ExprResult CurInitExprRes = - S.PerformImplicitConversion(CurInit.get(), Step->Type, *Step->ICS, - getAssignmentAction(Entity), CCK); + ExprResult CurInitExprRes = S.PerformImplicitConversion( + Init, Step->Type, *Step->ICS, getAssignmentAction(Entity), CCK); if (CurInitExprRes.isInvalid()) return ExprError(); - S.DiscardMisalignedMemberAddress(Step->Type.getTypePtr(), CurInit.get()); + S.DiscardMisalignedMemberAddress(Step->Type.getTypePtr(), Init); CurInit = CurInitExprRes; @@ -9244,10 +9330,11 @@ ExprResult InitializationSequence::Perform(Sema &S, case SK_CAssignment: { QualType SourceType = CurInit.get()->getType(); + Expr *Init = CurInit.get(); // Save off the initial CurInit in case we need to emit a diagnostic - ExprResult InitialCurInit = CurInit; - ExprResult Result = CurInit; + ExprResult InitialCurInit = Init; + ExprResult Result = Init; Sema::AssignConvertType ConvTy = S.CheckSingleAssignmentConstraints(Step->Type, Result, true, Entity.getKind() == InitializedEntity::EK_Parameter_CF_Audited); diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 3bfda09d5f80fc..f117fe98d142b0 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -12939,6 +12939,11 @@ ExprResult TreeTransform::TransformSourceLocExpr(SourceLocExpr *E) { getSema().CurContext); } +template +ExprResult TreeTransform::TransformEmbedExpr(EmbedExpr *E) { + return E; +} + template ExprResult TreeTransform::TransformCUDAKernelCallExpr(CUDAKernelCallExpr *E) { diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 67ef170251914e..e23ceffb10bfe6 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -1323,6 +1323,16 @@ void ASTStmtReader::VisitSourceLocExpr(SourceLocExpr *E) { E->SourceLocExprBits.Kind = Record.readInt(); } +void ASTStmtReader::VisitEmbedExpr(EmbedExpr *E) { + VisitExpr(E); + E->EmbedKeywordLoc = readSourceLocation(); + EmbedDataStorage *Data = new (Record.getContext()) EmbedDataStorage; + Data->BinaryData = cast(Record.readSubStmt()); + E->Data = Data; + E->Begin = Record.readInt(); + E->NumOfElements = Record.readInt(); +} + void ASTStmtReader::VisitAddrLabelExpr(AddrLabelExpr *E) { VisitExpr(E); E->setAmpAmpLoc(readSourceLocation()); @@ -3233,6 +3243,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { S = new (Context) SourceLocExpr(Empty); break; + case EXPR_BUILTIN_PP_EMBED: + S = new (Context) EmbedExpr(Empty); + break; + case EXPR_ADDR_LABEL: S = new (Context) AddrLabelExpr(Empty); break; diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 1ba6d5501fd102..12610c4ffe1c52 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -1262,6 +1262,16 @@ void ASTStmtWriter::VisitSourceLocExpr(SourceLocExpr *E) { Code = serialization::EXPR_SOURCE_LOC; } +void ASTStmtWriter::VisitEmbedExpr(EmbedExpr *E) { + VisitExpr(E); + Record.AddSourceLocation(E->getBeginLoc()); + Record.AddSourceLocation(E->getEndLoc()); + Record.AddStmt(E->getDataStringLiteral()); + Record.writeUInt32(E->getStartingElementPos()); + Record.writeUInt32(E->getDataElementCount()); + Code = serialization::EXPR_BUILTIN_PP_EMBED; +} + void ASTStmtWriter::VisitAddrLabelExpr(AddrLabelExpr *E) { VisitExpr(E); Record.AddSourceLocation(E->getAmpAmpLoc()); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 197d6731072851..c1a8aad83a90ba 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -2422,6 +2422,10 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, Bldr.addNodes(Dst); break; } + + case Stmt::EmbedExprClass: + llvm::report_fatal_error("Support for EmbedExpr is not implemented."); + break; } } diff --git a/clang/test/C/C2x/Inputs/bits.bin b/clang/test/C/C2x/Inputs/bits.bin new file mode 100644 index 00000000000000..ad471007bd7f59 --- /dev/null +++ b/clang/test/C/C2x/Inputs/bits.bin @@ -0,0 +1 @@ +0123456789 \ No newline at end of file diff --git a/clang/test/C/C2x/Inputs/boop.h b/clang/test/C/C2x/Inputs/boop.h new file mode 100644 index 00000000000000..d3e39674f19629 --- /dev/null +++ b/clang/test/C/C2x/Inputs/boop.h @@ -0,0 +1 @@ +*boop* \ No newline at end of file diff --git a/clang/test/C/C2x/Inputs/i.dat b/clang/test/C/C2x/Inputs/i.dat new file mode 100644 index 00000000000000..c227083464fb9a --- /dev/null +++ b/clang/test/C/C2x/Inputs/i.dat @@ -0,0 +1 @@ +0 \ No newline at end of file diff --git a/clang/test/C/C2x/Inputs/jump.wav b/clang/test/C/C2x/Inputs/jump.wav new file mode 100644 index 00000000000000..a71100636e8675 --- /dev/null +++ b/clang/test/C/C2x/Inputs/jump.wav @@ -0,0 +1 @@ +RIFF \ No newline at end of file diff --git a/clang/test/C/C2x/Inputs/s.dat b/clang/test/C/C2x/Inputs/s.dat new file mode 100644 index 00000000000000..3a332e6bba38d3 --- /dev/null +++ b/clang/test/C/C2x/Inputs/s.dat @@ -0,0 +1 @@ +012345678 \ No newline at end of file diff --git a/clang/test/C/C2x/n3017.c b/clang/test/C/C2x/n3017.c new file mode 100644 index 00000000000000..0d22d31baa4b7d --- /dev/null +++ b/clang/test/C/C2x/n3017.c @@ -0,0 +1,216 @@ +// RUN: %clang_cc1 -verify -fsyntax-only --embed-dir=%S/Inputs -std=c2x %s -Wno-constant-logical-operand + +/* WG14 N3017: full + * #embed - a scannable, tooling-friendly binary resource inclusion mechanism + */ + +// C23 6.10p6 +char b1[] = { +#embed "boop.h" limit(5) +, +#embed "boop.h" __limit__(5) +}; + +// C23 6.10.1p19 +#if __has_embed(__FILE__ ext::token(0xB055)) +#error "Supports an extension parameter Clang never claimed to support?" +#endif + +#if !__has_embed(__FILE__ clang::offset(0)) +#error "Doesn't support an extension Clang claims to support?" +#endif + +// C23 6.10.1p20 +void parse_into_s(short* ptr, unsigned char* ptr_bytes, unsigned long long size); +int f() { +#if __has_embed ("bits.bin" ds9000::element_type(short)) + /* Implementation extension: create short integers from the */ + /* translation environment resource into */ + /* a sequence of integer constants */ + short meow[] = { +#embed "bits.bin" ds9000::element_type(short) + }; +#elif __has_embed ("bits.bin") + /* no support for implementation-specific */ + /* ds9000::element_type(short) parameter */ + unsigned char meow_bytes[] = { + #embed "bits.bin" + }; + short meow[sizeof(meow_bytes) / sizeof(short)] = {}; + /* parse meow_bytes into short values by-hand! */ + parse_into_s(meow, meow_bytes, sizeof(meow_bytes)); +#else +#error "cannot find bits.bin resource" +#endif + return (int)(meow[0] + meow[(sizeof(meow) / sizeof(*meow)) - 1]); +} + +// NOTE: we don't have a good way to test infinite resources from within lit. +int g() { +#if __has_embed( limit(0)) == 2 + // if exists, this + // token sequence is always taken. + return 0; +#else + // the ’infinite-resource’ resource does not exist + #error "The resource does not exist" +#endif + // expected-error@-2 {{"The resource does not exist"}} +} + +#include +void have_you_any_wool(const unsigned char*, size_t); +int h() { + static const unsigned char baa_baa[] = { +#embed __FILE__ + }; + have_you_any_wool(baa_baa, sizeof(baa_baa)); + return 0; +} + +// C23 6.10.3.1p17: not tested here because we do not currently support any +// platforms where CHAR_BIT != 8. + +// C23 6.10.3.1p18 +int i() { +/* Braces may be kept or elided as per normal initialization rules */ + int i = { +#embed "i.dat" + }; /* valid if i.dat produces 1 value, + i value is [0, 2(embed element width)) */ + int i2 = +#embed "i.dat" + ; /* valid if i.dat produces 1 value, + i2 value is [0, 2(embed element width)) */ + struct s { + double a, b, c; + struct { double e, f, g; }; + double h, i, j; + }; + struct s x = { + /* initializes each element in order according to initialization + rules with comma-separated list of integer constant expressions + inside of braces */ + #embed "s.dat" + }; + return 0; +} + +// C23 6.10.3.1p19: not tested here because it's a runtime test rather than one +// which can be handled at compile time (it validates file contents via fread). + +// C23 6.10.3.2p5 +int j() { + static const char sound_signature[] = { +#embed limit(2+2) + }; + static_assert((sizeof(sound_signature) / sizeof(*sound_signature)) == 4, + "There should only be 4 elements in this array."); + // verify PCM WAV resource + static_assert(sound_signature[0] == 'R'); + static_assert(sound_signature[1] == 'I'); + static_assert(sound_signature[2] == 'F'); + static_assert(sound_signature[3] == 'F'); + static_assert(sizeof(sound_signature) == 4); + return 0; +} + +// C23 6.10.3p6 +int k() { +#define TWO_PLUS_TWO 2+2 + static const char sound_signature[] = { +#embed limit(TWO_PLUS_TWO) + }; + static_assert((sizeof(sound_signature) / sizeof(*sound_signature)) == 4, + "There should only be 4 elements in this array."); + // verify PCM WAV resource + static_assert(sound_signature[0] == 'R'); + static_assert(sound_signature[1] == 'I'); + static_assert(sound_signature[2] == 'F'); + static_assert(sound_signature[3] == 'F'); + static_assert(sizeof(sound_signature) == 4); + return 0; +} + +// C23 6.10.3.2p7: not tested here because we do not currently support any +// platforms where CHAR_BIT != 8. + +// C23 6.10.3.2p8: not tested here because it requires access to an infinite +// resource like /dev/urandom. + +// C23 6.10.3.3p4 +char *strcpy(char *, const char *); +#ifndef SHADER_TARGET + #define SHADER_TARGET "bits.bin" +#endif +extern char* null_term_shader_data; +void fill_in_data () { + const char internal_data[] = { +#embed SHADER_TARGET \ + suffix(,) + 0 + }; + strcpy(null_term_shader_data, internal_data); +} + +// C23 6.10.3.4p4 +#ifndef SHADER_TARGET +#define SHADER_TARGET "bits.bin" +#endif +extern char* merp; +void init_data () { + const char whl[] = { +#embed SHADER_TARGET \ + prefix(0xEF, 0xBB, 0xBF, ) /* UTF-8 BOM */ \ + suffix(,) + 0 + }; + // always null terminated, + // contains BOM if not-empty + const int is_good = (sizeof(whl) == 1 && whl[0] == '\0') + || (whl[0] == '\xEF' && whl[1] == '\xBB' + && whl[2] == '\xBF' && whl[sizeof(whl) - 1] == '\0'); + static_assert(is_good); + strcpy(merp, whl); +} + +// C23 6.10.3.5p3 +int l() { + return +#embed limit(0) prefix(1) if_empty(0) + ; + // becomes: + // return 0; + + // Validating the assumption from the example in the standard. + static_assert( +#embed limit(0) prefix(1) if_empty(0) + == 0); +} + +// C23 6.10.3.5p4 +void fill_in_data_again() { + const char internal_data[] = { +#embed SHADER_TARGET \ + suffix(, 0) \ + if_empty(0) + }; + strcpy(null_term_shader_data, internal_data); +} + +// C23 6.10.3.5p5 +int m() { + return +#embed __FILE__ limit(0) if_empty(45540) + ; + + // Validating the assumption from the example in the standard. + static_assert( +#embed __FILE__ limit(0) if_empty(45540) + == 45540); +} + +// 6.10.9.1p1 +static_assert(__STDC_EMBED_NOT_FOUND__ == 0); +static_assert(__STDC_EMBED_FOUND__ == 1); +static_assert(__STDC_EMBED_EMPTY__ == 2); diff --git a/clang/test/Preprocessor/Inputs/jk.txt b/clang/test/Preprocessor/Inputs/jk.txt new file mode 100644 index 00000000000000..93d177a48c83ab --- /dev/null +++ b/clang/test/Preprocessor/Inputs/jk.txt @@ -0,0 +1 @@ +jk \ No newline at end of file diff --git a/clang/test/Preprocessor/Inputs/media/art.txt b/clang/test/Preprocessor/Inputs/media/art.txt new file mode 100644 index 00000000000000..1ce9ab967e4a15 --- /dev/null +++ b/clang/test/Preprocessor/Inputs/media/art.txt @@ -0,0 +1,9 @@ + __ _ + .-.' `; `-._ __ _ + (_, .-:' `; `-._ + ,'o"( (_, ) + (__,-' ,'o"( )> + ( (__,-' ) + `-'._.--._( ) + ||| |||`-'._.--._.-' + ||| ||| diff --git a/clang/test/Preprocessor/Inputs/media/empty b/clang/test/Preprocessor/Inputs/media/empty new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/clang/test/Preprocessor/Inputs/numbers.txt b/clang/test/Preprocessor/Inputs/numbers.txt new file mode 100644 index 00000000000000..11f11f9be3babd --- /dev/null +++ b/clang/test/Preprocessor/Inputs/numbers.txt @@ -0,0 +1 @@ +0123456789 diff --git a/clang/test/Preprocessor/Inputs/single_byte.txt b/clang/test/Preprocessor/Inputs/single_byte.txt new file mode 100644 index 00000000000000..63d8dbd40c2354 --- /dev/null +++ b/clang/test/Preprocessor/Inputs/single_byte.txt @@ -0,0 +1 @@ +b \ No newline at end of file diff --git a/clang/test/Preprocessor/embed___has_embed.c b/clang/test/Preprocessor/embed___has_embed.c new file mode 100644 index 00000000000000..43a3068b5f53ad --- /dev/null +++ b/clang/test/Preprocessor/embed___has_embed.c @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -std=c23 %s -E --embed-dir=%S/Inputs -verify +// expected-no-diagnostics + +#if __has_embed(__FILE__) != __STDC_EMBED_FOUND__ +#error 1 +#elif __has_embed("media/art.txt") != __STDC_EMBED_FOUND__ +#error 2 +#elif __has_embed("asdkasdjkadsjkdsfjk") != __STDC_EMBED_NOT_FOUND__ +#error 3 +#elif __has_embed("asdkasdjkadsjkdsfjk" limit(1)) != __STDC_EMBED_NOT_FOUND__ +#error 4 +#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) limit(1)) != __STDC_EMBED_NOT_FOUND__ +#error 5 +#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) djsakdasjd::xmeow("xD")) != __STDC_EMBED_NOT_FOUND__ +#error 6 +#elif __has_embed(__FILE__ limit(2) prefix(y)) != __STDC_EMBED_FOUND__ +#error 7 +#elif __has_embed(__FILE__ limit(2)) != __STDC_EMBED_FOUND__ +#error 8 +// 6.10.1p7, if the search fails or any of the embed parameters in the embed +// parameter sequence specified are not supported by the implementation for the +// #embed directive; +// We don't support one of the embed parameters. +#elif __has_embed(__FILE__ dajwdwdjdahwk::meow(x)) != __STDC_EMBED_NOT_FOUND__ +#error 9 +#elif __has_embed() != __STDC_EMBED_EMPTY__ +#error 10 +// 6.10.1p7: if the search for the resource succeeds and all embed parameters +// in the embed parameter sequence specified are supported by the +// implementation for the #embed directive and the resource is empty +// Limiting to zero characters means the resource is empty. +#elif __has_embed( limit(0)) != __STDC_EMBED_EMPTY__ +#error 11 +#elif __has_embed( limit(0)) != __STDC_EMBED_EMPTY__ +#error 12 +// Test that an offset past the end of the file produces an empty file. +#elif __has_embed( clang::offset(1)) != __STDC_EMBED_EMPTY__ +#error 13 +// Test that we apply the offset before we apply the limit. If we did this in +// the reverse order, this would cause the file to be empty because we would +// have limited it to 1 byte and then offset past it. +#elif __has_embed( limit(1) clang::offset(12)) != __STDC_EMBED_FOUND__ +#error 14 +#elif __has_embed() != __STDC_EMBED_FOUND__ +#error 15 +#elif __has_embed( if_empty(meow)) != __STDC_EMBED_FOUND__ +#error 16 +#endif + +// Ensure that when __has_embed returns true, the file can actually be +// embedded. This was previously failing because the way in which __has_embed +// would search for files was differentl from how #embed would resolve them +// when the file path included relative path markers like `./` or `../`. +#if __has_embed("./embed___has_embed.c") == __STDC_EMBED_FOUND__ +unsigned char buffer[] = { +#embed "./embed___has_embed.c" +}; +#else +#error 17 +#endif diff --git a/clang/test/Preprocessor/embed___has_embed_parsing_errors.c b/clang/test/Preprocessor/embed___has_embed_parsing_errors.c new file mode 100644 index 00000000000000..fcaf693fe0ff2b --- /dev/null +++ b/clang/test/Preprocessor/embed___has_embed_parsing_errors.c @@ -0,0 +1,240 @@ +// RUN: %clang_cc1 -std=c23 %s -E -verify + +// Test the parsing behavior for __has_embed and all of its parameters to ensure we +// recover from failures gracefully. + +// expected-error@+2 {{missing '(' after '__has_embed'}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed +#endif + +// expected-error@+3 {{expected '>'}} \ + expected-note@+3 {{to match this '<'}} \ + expected-error@+3 {{expected value in expression}} +#if __has_embed(<) +#endif + +// expected-error@+3 {{expected "FILENAME" or }} \ + expected-warning@+3 {{missing terminating '"' character}} \ + expected-error@+3 {{invalid token at start of a preprocessor expression}} +#if __has_embed(") +#endif + +// expected-error@+2 {{missing '(' after '__has_embed'}} \ + expected-error@+2 {{token is not a valid binary operator in a preprocessor subexpression}} +#if __has_embed file.txt +#endif + +// OK, no diagnostic for an unknown embed parameter. +#if __has_embed("media/empty" xxx) +#endif + +// expected-error@+2 {{expected identifier}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed("media/empty" xxx::) +#endif + +// OK, no diagnostic for an unknown embed parameter. +#if __has_embed("media/empty" xxx::xxx) +#endif + +// expected-error@+2 {{expected identifier}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed("media/empty" xxx::42) +#endif + +// expected-error@+2 {{expected '('}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed("media/empty" limit) +#endif + +// We get the same diagnostic twice intentionally. The first one is because of +// the missing value within limit() and the second one is because the #if does +// not resolve to a value due to the earlier error. +// expected-error@+1 2 {{expected value in expression}} +#if __has_embed("media/empty" limit() +#endif + +// expected-error@+3 {{missing ')' after '__has_embed'}} \ + expected-error@+3 {{expected value in expression}} \ + expected-note@+3 {{to match this '('}} +#if __has_embed("media/empty" limit(xxx) +#endif + +// expected-error@+3 {{missing ')' after '__has_embed'}} \ + expected-error@+3 {{expected value in expression}} \ + expected-note@+3 {{to match this '('}} +#if __has_embed("media/empty" limit(42) +#endif + +// expected-error@+2 {{invalid token at start of a preprocessor expression}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed("media/empty" limit([) +#endif + +// expected-error@+2 {{invalid token at start of a preprocessor expression}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed("media/empty" limit([)) +#endif + +// expected-error@+2 {{division by zero in preprocessor expression}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed("media/empty" limit(1/0)) +#endif + +// expected-error@+2 {{expected '('}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed("media/empty" clang::offset) +#endif + +// We get the same diagnostic twice intentionally. The first one is because of +// the missing value within clang::offset() and the second one is because the +// #if does not resolve to a value due to the earlier error. +// expected-error@+1 2 {{expected value in expression}} +#if __has_embed("media/empty" clang::offset() +#endif + +// expected-error@+3 {{missing ')' after '__has_embed'}} \ + expected-error@+3 {{expected value in expression}} \ + expected-note@+3 {{to match this '('}} +#if __has_embed("media/empty" clang::offset(xxx) +#endif + +// expected-error@+3 {{missing ')' after '__has_embed'}} \ + expected-error@+3 {{expected value in expression}} \ + expected-note@+3 {{to match this '('}} +#if __has_embed("media/empty" clang::offset(42) +#endif + +// expected-error@+2 {{invalid token at start of a preprocessor expression}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed("media/empty" clang::offset([) +#endif + +// expected-error@+2 {{invalid token at start of a preprocessor expression}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed("media/empty" clang::offset([)) +#endif + +// expected-error@+2 {{division by zero in preprocessor expression}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed("media/empty" clang::offset(1/0)) +#endif + +// expected-error@+2 {{expected '('}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed("media/empty" clang::offset 42) +#endif + +// expected-error@+2 {{expected '('}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed("media/empty" prefix) +#endif + +// expected-error@+3 {{missing ')' after '__has_embed'}} \ + expected-error@+3 {{expected value in expression}} \ + expected-note@+3 {{to match this '('}} +#if __has_embed("media/empty" prefix() +#endif + +// expected-error@+3 {{missing ')' after '__has_embed'}} \ + expected-error@+3 {{expected value in expression}} \ + expected-note@+3 {{to match this '('}} +#if __has_embed("media/empty" prefix(xxx) +#endif + +#if __has_embed("media/empty" prefix(1/0)) // OK: emitted as tokens, not evaluated yet. +#endif +#if __has_embed("media/empty" prefix(([{}]))) // OK: delimiters balanced +#endif +// expected-error@+3 {{expected '}'}} \ + expected-note@+3 {{to match this '{'}} \ + expected-error@+3 {{expected value in expression}} +#if __has_embed("media/empty" prefix(([{)]})) +#endif +// expected-error@+3 {{expected ']'}} \ + expected-note@+3 {{to match this '['}} \ + expected-error@+3 {{expected value in expression}} +#if __has_embed("media/empty" prefix(([{})})) +#endif +// expected-error@+3 {{expected ')'}} \ + expected-note@+3 {{to match this '('}} \ + expected-error@+3 {{expected value in expression}} +#if __has_embed("media/empty" prefix(([{}]})) +#endif +#if __has_embed("media/empty" prefix()) // OK: tokens within parens are optional +#endif +// expected-error@+2 {{expected '('}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed("media/empty" prefix)) +#endif + +// expected-error@+2 {{expected '('}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed("media/empty" suffix) +#endif + +// expected-error@+3 {{missing ')' after '__has_embed'}} \ + expected-error@+3 {{expected value in expression}} \ + expected-note@+3 {{to match this '('}} +#if __has_embed("media/empty" suffix() +#endif + +// expected-error@+3 {{missing ')' after '__has_embed'}} \ + expected-error@+3 {{expected value in expression}} \ + expected-note@+3 {{to match this '('}} +#if __has_embed("media/empty" suffix(xxx) +#endif + +#if __has_embed("media/empty" suffix(1/0)) // OK: emitted as tokens, not evaluated yet. +#endif +#if __has_embed("media/empty" suffix(([{}]))) // OK: delimiters balanced +#endif +// expected-error@+3 {{expected '}'}} \ + expected-note@+3 {{to match this '{'}} \ + expected-error@+3 {{expected value in expression}} +#if __has_embed("media/empty" suffix(([{)]})) +#endif +// expected-error@+3 {{expected ']'}} \ + expected-note@+3 {{to match this '['}} \ + expected-error@+3 {{expected value in expression}} +#if __has_embed("media/empty" suffix(([{})})) +#endif +// expected-error@+3 {{expected ')'}} \ + expected-note@+3 {{to match this '('}} \ + expected-error@+3 {{expected value in expression}} +#if __has_embed("media/empty" suffix(([{}]})) +#endif +#if __has_embed("media/empty" suffix()) // OK: tokens within parens are optional +#endif +// expected-error@+2 {{expected '('}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed("media/empty" suffix)) +#endif + +#if __has_embed("media/art.txt" if_empty(1/0)) // OK: emitted as tokens, not evaluated yet. +#endif +#if __has_embed("media/art.txt" if_empty(([{}]))) // OK: delimiters balanced +#endif +// expected-error@+3 {{expected '}'}} \ + expected-note@+3 {{to match this '{'}} \ + expected-error@+3 {{expected value in expression}} +#if __has_embed("media/art.txt" if_empty(([{)]})) +#endif +// expected-error@+3 {{expected ']'}} \ + expected-note@+3 {{to match this '['}} \ + expected-error@+3 {{expected value in expression}} +#if __has_embed("media/art.txt" if_empty(([{})})) +#endif +// expected-error@+3 {{expected ')'}} \ + expected-note@+3 {{to match this '('}} \ + expected-error@+3 {{expected value in expression}} +#if __has_embed("media/art.txt" if_empty(([{}]})) +#endif +#if __has_embed("media/art.txt" if_empty()) // OK: tokens within parens are optional +#endif +// expected-error@+2 {{expected '('}} \ + expected-error@+2 {{expected value in expression}} +#if __has_embed("media/art.txt" if_empty)) +#endif + diff --git a/clang/test/Preprocessor/embed___has_embed_supported.c b/clang/test/Preprocessor/embed___has_embed_supported.c new file mode 100644 index 00000000000000..e51dbb870372bc --- /dev/null +++ b/clang/test/Preprocessor/embed___has_embed_supported.c @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -std=c23 %s -E -verify + +#if __has_embed(__FILE__) != __STDC_EMBED_FOUND__ +#error 1 +#elif __has_embed(__FILE__) != __STDC_EMBED_FOUND__ +#error 2 +#elif __has_embed(__FILE__ suffix(x)) != __STDC_EMBED_FOUND__ +#error 3 +#elif __has_embed(__FILE__ suffix(x) limit(1)) != __STDC_EMBED_FOUND__ +#error 4 +#elif __has_embed(__FILE__ suffix(x) limit(1) prefix(1)) != __STDC_EMBED_FOUND__ +#error 5 +#elif __has_embed(__FILE__ suffix(x) limit(2) prefix(1) clang::offset(1)) != __STDC_EMBED_FOUND__ +#error 6 +#elif __has_embed(__FILE__ suffix(x) limit(0) prefix(1)) != __STDC_EMBED_EMPTY__ +#error 7 +#elif __has_embed(__FILE__ suffix(x) limit(1) prefix(1) clang::offset(1)) != __STDC_EMBED_FOUND__ +#error 8 +#elif __has_embed(__FILE__ suffix(x) limit(0)) != __STDC_EMBED_EMPTY__ +#error 9 +#elif __has_embed(__FILE__ suffix(x) limit(0) if_empty(:3)) != __STDC_EMBED_EMPTY__ +#error 10 +#endif +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_art.c b/clang/test/Preprocessor/embed_art.c new file mode 100644 index 00000000000000..a664715091319f --- /dev/null +++ b/clang/test/Preprocessor/embed_art.c @@ -0,0 +1,104 @@ +// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify +// expected-no-diagnostics + +const char data[] = { +#embed +}; +const char data2[] = { +#embed +, 0 +}; +const char data3[] = { +#embed suffix(, 0) +}; +const char data4[] = { +#embed suffix(,) +0 +}; +static_assert(sizeof(data) == 274); +static_assert(' ' == data[0]); +static_assert('_' == data[11]); +static_assert('\n' == data[273]); +static_assert(sizeof(data2) == 275); +static_assert(' ' == data2[0]); +static_assert('_' == data2[11]); +static_assert('\n' == data2[273]); +static_assert('\0' == data2[274]); +static_assert(sizeof(data3) == 275); +static_assert(' ' == data3[0]); +static_assert('_' == data3[11]); +static_assert('\n' == data3[273]); +static_assert('\0' == data3[274]); +static_assert(sizeof(data4) == 275); +static_assert(' ' == data4[0]); +static_assert('_' == data4[11]); +static_assert('\n' == data4[273]); +static_assert('\0' == data4[274]); + +const signed char data5[] = { +#embed +}; +const signed char data6[] = { +#embed +, 0 +}; +const signed char data7[] = { +#embed suffix(, 0) +}; +const signed char data8[] = { +#embed suffix(,) +0 +}; +static_assert(sizeof(data5) == 274); +static_assert(' ' == data5[0]); +static_assert('_' == data5[11]); +static_assert('\n' == data5[273]); +static_assert(sizeof(data6) == 275); +static_assert(' ' == data6[0]); +static_assert('_' == data6[11]); +static_assert('\n' == data6[273]); +static_assert('\0' == data6[274]); +static_assert(sizeof(data7) == 275); +static_assert(' ' == data7[0]); +static_assert('_' == data7[11]); +static_assert('\n' == data7[273]); +static_assert('\0' == data7[274]); +static_assert(sizeof(data8) == 275); +static_assert(' ' == data8[0]); +static_assert('_' == data8[11]); +static_assert('\n' == data8[273]); +static_assert('\0' == data8[274]); + +const unsigned char data9[] = { +#embed +}; +const unsigned char data10[] = { +0, +#embed +}; +const unsigned char data11[] = { +#embed prefix(0,) +}; +const unsigned char data12[] = { +0 +#embed prefix(,) +}; +static_assert(sizeof(data9) == 274); +static_assert(' ' == data9[0]); +static_assert('_' == data9[11]); +static_assert('\n' == data9[273]); +static_assert(sizeof(data10) == 275); +static_assert(' ' == data10[1]); +static_assert('_' == data10[12]); +static_assert('\n' == data10[274]); +static_assert('\0' == data10[0]); +static_assert(sizeof(data11) == 275); +static_assert(' ' == data11[1]); +static_assert('_' == data11[12]); +static_assert('\n' == data11[274]); +static_assert('\0' == data11[0]); +static_assert(sizeof(data12) == 275); +static_assert(' ' == data12[1]); +static_assert('_' == data12[12]); +static_assert('\n' == data12[274]); +static_assert('\0' == data12[0]); diff --git a/clang/test/Preprocessor/embed_codegen.cpp b/clang/test/Preprocessor/embed_codegen.cpp new file mode 100644 index 00000000000000..64110afc162d72 --- /dev/null +++ b/clang/test/Preprocessor/embed_codegen.cpp @@ -0,0 +1,84 @@ +// RUN: %clang_cc1 %s -triple x86_64 --embed-dir=%S/Inputs -emit-llvm -o - | FileCheck %s + +// CHECK: @__const._Z3fooi.ca = private unnamed_addr constant [3 x i32] [i32 0, i32 106, i32 107], align 4 +// CHECK: @__const._Z3fooi.sc = private unnamed_addr constant %struct.S1 { i32 106, i32 107, i32 0 }, align 4 +// CHECK: @__const._Z3fooi.t = private unnamed_addr constant [3 x %struct.T] [%struct.T { [2 x i32] [i32 48, i32 49], %struct.S1 { i32 50, i32 51, i32 52 } }, %struct.T { [2 x i32] [i32 53, i32 54], %struct.S1 { i32 55, i32 56, i32 57 } }, %struct.T { [2 x i32] [i32 10, i32 0], %struct.S1 zeroinitializer }], align 16 +void foo(int a) { +// CHECK: %a.addr = alloca i32, align 4 +// CHECK: store i32 %a, ptr %a.addr, align 4 +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %ca, ptr align 4 @__const._Z3fooi.ca, i64 12, i1 false) +int ca[] = { +0 +#embed prefix(,) +}; + +// CHECK: %arrayinit.element = getelementptr inbounds i32, ptr %notca, i64 1 +// CHECK: store i8 106, ptr %arrayinit.element, align 4 +// CHECK: %arrayinit.element1 = getelementptr inbounds i32, ptr %notca, i64 2 +// CHECK: store i8 107, ptr %arrayinit.element1, align 4 +int notca[] = { +a +#embed prefix(,) +}; + +struct S1 { + int x, y, z; +}; + +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %sc, ptr align 4 @__const._Z3fooi.sc, i64 12, i1 false) +S1 sc = { +#embed suffix(,) +0 +}; + +// CHECK: %x = getelementptr inbounds %struct.S1, ptr %s, i32 0, i32 0 +// CHECK: store i32 106, ptr %x, align 4 +// CHECK: %y = getelementptr inbounds %struct.S1, ptr %s, i32 0, i32 1 +// CHECK: store i32 107, ptr %y, align 4 +// CHECK: %z = getelementptr inbounds %struct.S1, ptr %s, i32 0, i32 2 +// CHECK: %1 = load i32, ptr %a.addr, align 4 +S1 s = { +#embed suffix(,) +a +}; + +// CHECK: store i32 107, ptr %b, align 4 +int b = +#embed +; + + +struct T { + int arr[2]; + struct S1 s; +}; + +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 %t, ptr align 16 @__const._Z3fooi.t, i64 60, i1 false) +constexpr struct T t[] = { +#embed +}; + +// CHECK: %arr = getelementptr inbounds %struct.T, ptr %tnonc, i32 0, i32 0 +// CHECK: %2 = load i32, ptr %a.addr, align 4 +// CHECK: store i32 %2, ptr %arr, align 4 +// CHECK: %arrayinit.element2 = getelementptr inbounds i32, ptr %arr, i64 1 +// CHECK: store i32 300, ptr %arrayinit.element2, align 4 +// CHECK: %s3 = getelementptr inbounds %struct.T, ptr %tnonc, i32 0, i32 1 +// CHECK: %x4 = getelementptr inbounds %struct.S1, ptr %s3, i32 0, i32 0 +// CHECK: store i32 1, ptr %x4, align 4 +// CHECK: %y5 = getelementptr inbounds %struct.S1, ptr %s3, i32 0, i32 1 +// CHECK: store i32 2, ptr %y5, align 4 +// CHECK: %z6 = getelementptr inbounds %struct.S1, ptr %s3, i32 0, i32 2 +// CHECK: store i32 3, ptr %z6, align 4 +// CHECK: %arrayinit.element7 = getelementptr inbounds %struct.T, ptr %tnonc, i64 1 +// CHECK: call void @llvm.memset.p0.i64(ptr align 4 %arrayinit.element7, i8 0, i64 20, i1 false) +// CHECK: %arr8 = getelementptr inbounds %struct.T, ptr %arrayinit.element7, i32 0, i32 0 +// CHECK: store i8 106, ptr %arr8, align 4 +// CHECK: %arrayinit.element9 = getelementptr inbounds i32, ptr %arr8, i64 1 +// CHECK: store i8 107, ptr %arrayinit.element9, align 4 +struct T tnonc[] = { + a, 300, 1, 2, 3 +#embed prefix(,) +}; + +} diff --git a/clang/test/Preprocessor/embed_constexpr.cpp b/clang/test/Preprocessor/embed_constexpr.cpp new file mode 100644 index 00000000000000..1cadff76b4890a --- /dev/null +++ b/clang/test/Preprocessor/embed_constexpr.cpp @@ -0,0 +1,97 @@ +// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -Wno-c23-extensions +// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -fexperimental-new-constant-interpreter -Wno-c23-extensions + +constexpr int value(int a, int b) { + return a + b; +} + +constexpr int func_call() { + return value( +#embed + ); +} + +constexpr int init_list_expr() { + int vals[] = { +#embed + }; + return value(vals[0], vals[1]); +} + +template +struct Hurr { + static constexpr int V1 = N; + static constexpr int V2 = M; +}; + +constexpr int template_args() { + Hurr< +#embed + > H; + return value(H.V1, H.V2); +} + +constexpr int ExpectedValue = 'j' + 'k'; +static_assert(func_call() == ExpectedValue); +static_assert(init_list_expr() == ExpectedValue); +static_assert(template_args() == ExpectedValue); + +static_assert( +#embed limit(1) suffix(== 'j') +); + +int array[ +#embed limit(1) +]; +static_assert(sizeof(array) / sizeof(int) == 'j'); + +constexpr int comma_expr = ( +#embed // expected-warning {{left operand of comma operator has no effect}} +); +static_assert(comma_expr == 'k'); + +constexpr int comma_expr_init_list{ ( +#embed limit(1) +) }; +static_assert(comma_expr_init_list == 'j'); + +constexpr int paren_init( +#embed limit(1) +); +static_assert(paren_init == 'j'); + +struct S { + const char buffer[2] = { +#embed "jk.txt" + }; +}; + +constexpr struct S s; +static_assert(s.buffer[1] == 'k'); + +struct S1 { + int x, y; +}; + +struct T { + int x, y; + struct S1 s; +}; + +constexpr struct T t[] = { +#embed +}; +static_assert(t[0].s.x == '2'); + +constexpr int func(int i, int) { return i; } +static_assert( + func( +#embed + ) == 'j'); + +template +struct ST {}; + +ST< +#embed limit(1) +> st; diff --git a/clang/test/Preprocessor/embed_dependencies.c b/clang/test/Preprocessor/embed_dependencies.c new file mode 100644 index 00000000000000..4e00dc79ac190b --- /dev/null +++ b/clang/test/Preprocessor/embed_dependencies.c @@ -0,0 +1,20 @@ +// RUN: %clang %s -fsyntax-only -std=c23 -M --embed-dir=%S/Inputs -Xclang -verify | FileCheck %s + +// Yes this looks very strange indeed, but the goal is to test that we add +// files referenced by both __has_embed and #embed when we generate +// dependencies, so we're trying to see that both of these files are in the +// output. +#if __has_embed() +const char data = +#embed "Inputs/single_byte.txt" +; +_Static_assert('b' == data); +#else +#error "oops" +#endif +// expected-no-diagnostics + +// CHECK: embed_dependencies.c \ +// CHECK-NEXT: jk.txt \ +// CHECK-NEXT: Inputs{{[/\\]}}single_byte.txt + diff --git a/clang/test/Preprocessor/embed_ext_compat_diags.c b/clang/test/Preprocessor/embed_ext_compat_diags.c new file mode 100644 index 00000000000000..74f24176d9ccab --- /dev/null +++ b/clang/test/Preprocessor/embed_ext_compat_diags.c @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify=none -pedantic +// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify=compat -Wpre-c23-compat +// RUN: %clang_cc1 -std=c17 %s -fsyntax-only --embed-dir=%S/Inputs -verify=ext -pedantic +// RUN: %clang_cc1 -x c++ %s -fsyntax-only --embed-dir=%S/Inputs -verify=cxx -pedantic +// none-no-diagnostics + +#if __has_embed("jk.txt") + +const char buffer[] = { +#embed "jk.txt" /* compat-warning {{#embed is incompatible with C standards before C23}} + ext-warning {{#embed is a C23 extension}} + cxx-warning {{#embed is a Clang extension}} + */ +}; +#endif + diff --git a/clang/test/Preprocessor/embed_feature_test.cpp b/clang/test/Preprocessor/embed_feature_test.cpp new file mode 100644 index 00000000000000..2648804132599b --- /dev/null +++ b/clang/test/Preprocessor/embed_feature_test.cpp @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 %s -E -CC -verify +// RUN: %clang_cc1 -x c %s -E -CC -verify +// expected-no-diagnostics + +#if !defined(__has_embed) +#error 1 +#endif diff --git a/clang/test/Preprocessor/embed_file_not_found_chevron.c b/clang/test/Preprocessor/embed_file_not_found_chevron.c new file mode 100644 index 00000000000000..472222aafa55a0 --- /dev/null +++ b/clang/test/Preprocessor/embed_file_not_found_chevron.c @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -std=c23 %s -E -verify + +#embed +// expected-error@-1 {{'nfejfNejAKFe' file not found}} diff --git a/clang/test/Preprocessor/embed_file_not_found_quote.c b/clang/test/Preprocessor/embed_file_not_found_quote.c new file mode 100644 index 00000000000000..bf9c62b55c99ef --- /dev/null +++ b/clang/test/Preprocessor/embed_file_not_found_quote.c @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -std=c23 %s -E -verify + +#embed "nfejfNejAKFe" +// expected-error@-1 {{'nfejfNejAKFe' file not found}} diff --git a/clang/test/Preprocessor/embed_init.c b/clang/test/Preprocessor/embed_init.c new file mode 100644 index 00000000000000..79b1743703ac5b --- /dev/null +++ b/clang/test/Preprocessor/embed_init.c @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify +// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify -fexperimental-new-constant-interpreter +// expected-no-diagnostics + +typedef struct kitty { + int purr; +} kitty; + +typedef struct kitty_kitty { + int here; + kitty kit; +} kitty_kitty; + +const int meow = +#embed +; + +const kitty kit = { +#embed +}; + +const kitty_kitty kit_kit = { +#embed +}; + +static_assert(meow == 'b'); +static_assert(kit.purr == 'b'); +static_assert(kit_kit.here == 'j'); +static_assert(kit_kit.kit.purr == 'k'); diff --git a/clang/test/Preprocessor/embed_parameter_if_empty.c b/clang/test/Preprocessor/embed_parameter_if_empty.c new file mode 100644 index 00000000000000..70f1bc6a28be15 --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_if_empty.c @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify + +const char data[] = { +#embed if_empty(123, 124, 125) +}; +const char non_empty_data[] = { +#embed if_empty(123, 124, 125) +}; +static_assert(sizeof(data) == 3); +static_assert(123 == data[0]); +static_assert(124 == data[1]); +static_assert(125 == data[2]); +static_assert(sizeof(non_empty_data) == 2); +static_assert('j' == non_empty_data[0]); +static_assert('k' == non_empty_data[1]); + +// Ensure we diagnose duplicate parameters even if they're the same value. +const unsigned char a[] = { +#embed if_empty(1) prefix() if_empty(2) +// expected-error@-1 {{cannot specify parameter 'if_empty' twice in the same '#embed' directive}} +, +#embed if_empty(1) suffix() if_empty(2) +// expected-error@-1 {{cannot specify parameter 'if_empty' twice in the same '#embed' directive}} +}; diff --git a/clang/test/Preprocessor/embed_parameter_limit.c b/clang/test/Preprocessor/embed_parameter_limit.c new file mode 100644 index 00000000000000..da3e4fb877c1b9 --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_limit.c @@ -0,0 +1,94 @@ +// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify + +const char data[] = { +#embed +}; +const char offset_data[] = { +#embed limit(1) +}; +static_assert(sizeof(data) == 2); +static_assert('j' == data[0]); +static_assert('k' == data[1]); +static_assert(sizeof(offset_data) == 1); +static_assert('j' == offset_data[0]); +static_assert(offset_data[0] == data[0]); + +// Cannot have a negative limit. +#embed limit(-1) +// expected-error@-1 {{invalid value '-1'; must be positive}} + +// It can have a limit of 0, in which case the __has_embed should return false. +#if __has_embed( limit(0)) != __STDC_EMBED_EMPTY__ +#error "__has_embed should return false when there's no data" +#endif + +// When the limit is zero, the resource is empty, so if_empty kicks in. +const unsigned char buffer[] = { +#embed limit(0) if_empty(1) +}; +static_assert(sizeof(buffer) == 1); +static_assert(buffer[0] == 1); + +// However, prefix and suffix do not kick in. +const unsigned char other_buffer[] = { + 1, +#embed limit(0) prefix(2,) suffix(3) +}; +static_assert(sizeof(other_buffer) == 1); +static_assert(other_buffer[0] == 1); + +// Ensure we can limit to something larger than the file size as well. +const unsigned char third_buffer[] = { +#embed limit(100) +}; +static_assert(sizeof(third_buffer) == 2); +static_assert('j' == third_buffer[0]); +static_assert('k' == third_buffer[1]); + +// Test the limits of a file with more than one character in it. +const unsigned char fourth_buffer[] = { +#embed limit(10) +}; +static_assert(sizeof(fourth_buffer) == 10); +static_assert(' ' == fourth_buffer[0]); +static_assert(' ' == fourth_buffer[1]); +static_assert(' ' == fourth_buffer[2]); +static_assert(' ' == fourth_buffer[3]); +static_assert(' ' == fourth_buffer[4]); +static_assert(' ' == fourth_buffer[5]); +static_assert(' ' == fourth_buffer[6]); +static_assert(' ' == fourth_buffer[7]); +static_assert(' ' == fourth_buffer[8]); +static_assert(' ' == fourth_buffer[9]); + +// Ensure that a limit larger than what can fit into a 64-bit value is +// rejected. This limit is fine because it fits in a 64-bit value. +const unsigned char fifth_buffer[] = { +#embed limit(0xFFFF'FFFF'FFFF'FFFF) +}; +static_assert(sizeof(fifth_buffer) == 2); +static_assert('j' == fifth_buffer[0]); +static_assert('k' == fifth_buffer[1]); + +// But this one is not fine because it does not fit into a 64-bit value. +const unsigned char sixth_buffer[] = { +#embed limit(0xFFFF'FFFF'FFFF'FFFF'1) +}; +// expected-error@-2 {{integer literal is too large to be represented in any integer type}} +// Note: the preprocessor will continue with the truncated value, so the parser +// will treat this case and the previous one identically in terms of what +// contents are retained from the embedded resource (which is the entire file). + +// Ensure we diagnose duplicate parameters even if they're the same value. +const unsigned char a[] = { +#embed limit(1) prefix() limit(1) +// expected-error@-1 {{cannot specify parameter 'limit' twice in the same '#embed' directive}} +, +#embed limit(1) if_empty() limit(2) +// expected-error@-1 {{cannot specify parameter 'limit' twice in the same '#embed' directive}} +}; + +// C23 6.10.3.2p2 +static_assert( +#embed limit(defined(FOO)) // expected-error {{'defined' cannot appear within this context}} + == 0); // expected-error {{expected expression}} diff --git a/clang/test/Preprocessor/embed_parameter_offset.c b/clang/test/Preprocessor/embed_parameter_offset.c new file mode 100644 index 00000000000000..ab1bd3f9f034ec --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_offset.c @@ -0,0 +1,89 @@ +// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify + +const char data[] = { +#embed +}; +const char offset_data[] = { +#embed clang::offset(1) +}; +static_assert(sizeof(data) == 2); +static_assert('j' == data[0]); +static_assert('k' == data[1]); +static_assert(sizeof(offset_data) == 1); +static_assert('k' == offset_data[0]); +static_assert(offset_data[0] == data[1]); + +// Cannot have a negative offset. +#embed clang::offset(-1) +// expected-error@-1 {{invalid value '-1'; must be positive}} + +// If the offset is past the end of the file, the file should be considered +// empty. +#if __has_embed( clang::offset(3)) != __STDC_EMBED_EMPTY__ +#error "__has_embed should return false when there's no data" +#endif + +// When the offset is past the end of the file, the resource is empty, so if_empty kicks in. +const unsigned char buffer[] = { +#embed clang::offset(3) if_empty(1) +}; +static_assert(sizeof(buffer) == 1); +static_assert(buffer[0] == 1); + +// However, prefix and suffix do not kick in. +const unsigned char other_buffer[] = { + 1, +#embed clang::offset(3) prefix(2,) suffix(3) +}; +static_assert(sizeof(other_buffer) == 1); +static_assert(other_buffer[0] == 1); + +// Ensure we can offset to zero (that's the default behavior) +const unsigned char third_buffer[] = { +#embed clang::offset(0) +}; +static_assert(sizeof(third_buffer) == 2); +static_assert('j' == third_buffer[0]); +static_assert('k' == third_buffer[1]); + +// Test the offsets of a file with more than one character in it. +const unsigned char fourth_buffer[] = { +#embed clang::offset(24) limit(4) +}; +static_assert(sizeof(fourth_buffer) == 4); +static_assert('.' == fourth_buffer[0]); +static_assert('-' == fourth_buffer[1]); +static_assert('.' == fourth_buffer[2]); +static_assert('\'' == fourth_buffer[3]); + +// Ensure that an offset larger than what can fit into a 64-bit value is +// rejected. This offset is fine because it fits in a 64-bit value. +const unsigned char fifth_buffer[] = { + 1, +#embed clang::offset(0xFFFF'FFFF'FFFF'FFFF) +}; +static_assert(sizeof(fifth_buffer) == 1); +static_assert(1 == fifth_buffer[0]); + +// But this one is not fine because it does not fit into a 64-bit value. +const unsigned char sixth_buffer[] = { +#embed clang::offset(0xFFFF'FFFF'FFFF'FFFF'1) +}; +// expected-error@-2 {{integer literal is too large to be represented in any integer type}} + +// Ensure we diagnose duplicate parameters even if they're the same value. +const unsigned char a[] = { +#embed clang::offset(1) prefix() clang::offset(1) +// expected-error@-1 {{cannot specify parameter 'clang::offset' twice in the same '#embed' directive}} +, +#embed clang::offset(1) if_empty() clang::offset(2) +// expected-error@-1 {{cannot specify parameter 'clang::offset' twice in the same '#embed' directive}} +}; + +// Matches with C23 6.10.3.2p2, is documented as part of our extension. +static_assert( +#embed clang::offset(defined(FOO)) + == 0); // expected-error {{expected expression}} + /* expected-error@-2 {{'defined' cannot appear within this context}} + pedantic-warning@-2 {{'clang::offset' is a Clang extension}} + */ diff --git a/clang/test/Preprocessor/embed_parameter_prefix.c b/clang/test/Preprocessor/embed_parameter_prefix.c new file mode 100644 index 00000000000000..b55c08f013955d --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_prefix.c @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify + +const char data[] = { +#embed prefix('\xA', ) +}; +const char empty_data[] = { +#embed prefix('\xA', ) +1 +}; +static_assert(sizeof(data) == 2); +static_assert('\xA' == data[0]); +static_assert('b' == data[1]); +static_assert(sizeof(empty_data) == 1); +static_assert(1 == empty_data[0]); + +struct S { + int x, y, z; +}; + +const struct S s = { +#embed prefix( .x = 100, .y = 10, ) +}; +static_assert(s.x == 100); +static_assert(s.y == 10); +static_assert(s.z == 'b'); + +// Ensure that an empty file does not produce any prefix tokens. If it did, +// there would be random tokens here that the parser would trip on. +#embed prefix(0) + +// Ensure we diagnose duplicate parameters even if they're the same value. +const unsigned char a[] = { +#embed prefix(1,) limit(1) prefix(1,) +// expected-error@-1 {{cannot specify parameter 'prefix' twice in the same '#embed' directive}} +, +#embed prefix(1,) if_empty() prefix(2,) +// expected-error@-1 {{cannot specify parameter 'prefix' twice in the same '#embed' directive}} +}; diff --git a/clang/test/Preprocessor/embed_parameter_suffix.c b/clang/test/Preprocessor/embed_parameter_suffix.c new file mode 100644 index 00000000000000..7d76826828245f --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_suffix.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -std=c23 %s --embed-dir=%S/Inputs -fsyntax-only -verify + +const char data[] = { +#embed suffix(, '\xA') +}; +const char empty_data[] = { +#embed suffix(, '\xA') +1 +}; +static_assert(sizeof(data) == 2); +static_assert('b' == data[0]); +static_assert('\xA' == data[1]); +static_assert(sizeof(empty_data) == 1); +static_assert(1 == empty_data[0]); + +struct S { + int x, y, z; +}; + +const struct S s = { +#embed suffix( , .y = 100, .z = 10 ) +}; + +static_assert(s.x == 'b'); +static_assert(s.y == 100); +static_assert(s.z == 10); + +// Ensure that an empty file does not produce any suffix tokens. If it did, +// there would be random tokens here that the parser would trip on. +#embed suffix(0) + +// Ensure we diagnose duplicate parameters even if they're the same value. +const unsigned char a[] = { +#embed suffix(,1) prefix() suffix(,1) +// expected-error@-1 {{cannot specify parameter 'suffix' twice in the same '#embed' directive}} +, +#embed suffix(,1) if_empty() suffix(,2) +// expected-error@-1 {{cannot specify parameter 'suffix' twice in the same '#embed' directive}} +}; diff --git a/clang/test/Preprocessor/embed_parameter_unrecognized.c b/clang/test/Preprocessor/embed_parameter_unrecognized.c new file mode 100644 index 00000000000000..b03384341a00a3 --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_unrecognized.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 %s -std=c23 -E -verify +// okay-no-diagnostics + +#embed __FILE__ unrecognized +// expected-error@-1 {{unknown embed preprocessor parameter 'unrecognized'}} +#embed __FILE__ unrecognized::param +// expected-error@-1 {{unknown embed preprocessor parameter 'unrecognized::param'}} +#embed __FILE__ unrecognized::param(with, args) +// expected-error@-1 {{unknown embed preprocessor parameter 'unrecognized::param'}} diff --git a/clang/test/Preprocessor/embed_parsing_errors.c b/clang/test/Preprocessor/embed_parsing_errors.c new file mode 100644 index 00000000000000..490ec6d4ded2c9 --- /dev/null +++ b/clang/test/Preprocessor/embed_parsing_errors.c @@ -0,0 +1,130 @@ +// RUN: %clang_cc1 -std=c23 %s -E -verify + +// Test the parsing behavior for #embed and all of its parameters to ensure we +// recover from failures gracefully. +char buffer[] = { +#embed +// expected-error@-1 {{expected "FILENAME" or }} + +#embed < +// expected-error@-1 {{expected '>'}} \ + expected-note@-1 {{to match this '<'}} + +#embed " +// expected-error@-1 {{expected "FILENAME" or }} \ + expected-warning@-1 {{missing terminating '"' character}} + +#embed file.txt +// expected-error@-1{{expected "FILENAME" or }} + +#embed "embed_parsing_errors.c" xxx +// expected-error@-1 {{unknown embed preprocessor parameter 'xxx'}} + +#embed "embed_parsing_errors.c" xxx:: +// expected-error@-1 {{expected identifier}} + +#embed "embed_parsing_errors.c" xxx::xxx +// expected-error@-1 {{unknown embed preprocessor parameter 'xxx::xxx'}} + +#embed "embed_parsing_errors.c" xxx::42 +// expected-error@-1 {{expected identifier}} + +#embed "embed_parsing_errors.c" limit +// expected-error@-1 {{expected '('}} + +#embed "embed_parsing_errors.c" limit( +// expected-error@-1 {{expected value in expression}} + +#embed "embed_parsing_errors.c" limit(xxx +// expected-error@-1 {{expected ')'}} + +#embed "embed_parsing_errors.c" limit(42 +// expected-error@-1 {{expected ')'}} + +#embed "embed_parsing_errors.c" limit([ +// expected-error@-1 {{invalid token at start of a preprocessor expression}} + +#embed "embed_parsing_errors.c" limit([) +// expected-error@-1 {{invalid token at start of a preprocessor expression}} + +#embed "embed_parsing_errors.c" limit(1/0) +// expected-error@-1 {{division by zero in preprocessor expression}} + +#embed "embed_parsing_errors.c" clang::offset +// expected-error@-1 {{expected '('}} + +#embed "embed_parsing_errors.c" clang::offset( +// expected-error@-1 {{expected value in expression}} + +#embed "embed_parsing_errors.c" clang::offset(xxx +// expected-error@-1 {{expected ')'}} + +#embed "embed_parsing_errors.c" clang::offset(42 +// expected-error@-1 {{expected ')'}} + +#embed "embed_parsing_errors.c" clang::offset([ +// expected-error@-1 {{invalid token at start of a preprocessor expression}} + +#embed "embed_parsing_errors.c" clang::offset([) +// expected-error@-1 {{invalid token at start of a preprocessor expression}} + +#embed "embed_parsing_errors.c" clang::offset(1/0) +// expected-error@-1 {{division by zero in preprocessor expression}} + +#embed "embed_parsing_errors.c" clang::offset 42 +// expected-error@-1 {{expected '('}} + +#embed "embed_parsing_errors.c" prefix +// expected-error@-1 {{expected '('}} + +#embed "embed_parsing_errors.c" prefix( +// expected-error@-1 {{expected ')'}} + +#embed "embed_parsing_errors.c" prefix(xxx +// expected-error@-1 {{expected ')'}} + +#embed "embed_parsing_errors.c" prefix(1/0) // OK: emitted as tokens, not evaluated yet. +#embed "embed_parsing_errors.c" prefix(([{}])) // OK: delimiters balanced +#embed "embed_parsing_errors.c" prefix(([{)]}) +// expected-error@-1 {{expected '}'}} expected-note@-1 {{to match this '{'}} +#embed "embed_parsing_errors.c" prefix(([{})}) +// expected-error@-1 {{expected ']'}} expected-note@-1 {{to match this '['}} +#embed "embed_parsing_errors.c" prefix(([{}]}) +// expected-error@-1 {{expected ')'}} expected-note@-1 {{to match this '('}} +#embed "embed_parsing_errors.c" prefix() // OK: tokens within parens are optional +#embed "embed_parsing_errors.c" prefix) +// expected-error@-1 {{expected '('}} + +#embed "embed_parsing_errors.c" suffix +// expected-error@-1 {{expected '('}} + +#embed "embed_parsing_errors.c" suffix( +// expected-error@-1 {{expected ')'}} + +#embed "embed_parsing_errors.c" suffix(xxx +// expected-error@-1 {{expected ')'}} + +#embed "embed_parsing_errors.c" suffix(1/0) // OK: emitted as tokens, not evaluated yet. +#embed "embed_parsing_errors.c" suffix(([{}])) // OK: delimiters balanced +#embed "embed_parsing_errors.c" suffix(([{)]}) +// expected-error@-1 {{expected '}'}} expected-note@-1 {{to match this '{'}} +#embed "embed_parsing_errors.c" suffix(([{})}) +// expected-error@-1 {{expected ']'}} expected-note@-1 {{to match this '['}} +#embed "embed_parsing_errors.c" suffix(([{}]}) +// expected-error@-1 {{expected ')'}} expected-note@-1 {{to match this '('}} +#embed "embed_parsing_errors.c" suffix() // OK: tokens within parens are optional +#embed "embed_parsing_errors.c" suffix) +// expected-error@-1 {{expected '('}} + +#embed "embed_parsing_errors.c" if_empty(1/0) // OK: emitted as tokens, not evaluated yet. +#embed "embed_parsing_errors.c" if_empty(([{}])) // OK: delimiters balanced +#embed "embed_parsing_errors.c" if_empty(([{)]}) +// expected-error@-1 {{expected '}'}} expected-note@-1 {{to match this '{'}} +#embed "embed_parsing_errors.c" if_empty(([{})}) +// expected-error@-1 {{expected ']'}} expected-note@-1 {{to match this '['}} +#embed "embed_parsing_errors.c" if_empty(([{}]}) +// expected-error@-1 {{expected ')'}} expected-note@-1 {{to match this '('}} +#embed "embed_parsing_errors.c" if_empty() // OK: tokens within parens are optional +#embed "embed_parsing_errors.c" if_empty) +// expected-error@-1 {{expected '('}} +}; diff --git a/clang/test/Preprocessor/embed_path_chevron.c b/clang/test/Preprocessor/embed_path_chevron.c new file mode 100644 index 00000000000000..b12cb9ceb54b8b --- /dev/null +++ b/clang/test/Preprocessor/embed_path_chevron.c @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 %s -std=c23 -fsyntax-only --embed-dir=%S/Inputs -verify +// expected-no-diagnostics + +const char data[] = { +#embed +}; +static_assert(sizeof(data) == 1); +static_assert('b' == data[0]); diff --git a/clang/test/Preprocessor/embed_path_quote.c b/clang/test/Preprocessor/embed_path_quote.c new file mode 100644 index 00000000000000..79ca1e5c811b81 --- /dev/null +++ b/clang/test/Preprocessor/embed_path_quote.c @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify +// expected-no-diagnostics + +const char data[] = { +#embed "single_byte.txt" +}; +static_assert(sizeof(data) == 1); +static_assert('a' == data[0]); diff --git a/clang/test/Preprocessor/embed_preprocess_to_file.c b/clang/test/Preprocessor/embed_preprocess_to_file.c new file mode 100644 index 00000000000000..9895d958cf96d6 --- /dev/null +++ b/clang/test/Preprocessor/embed_preprocess_to_file.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -std=c23 %s -E --embed-dir=%S/Inputs | FileCheck %s --check-prefix EXPANDED +// RUN: %clang_cc1 -std=c23 %s -E -dE --embed-dir=%S/Inputs | FileCheck %s --check-prefix DIRECTIVE + +// Ensure that we correctly preprocess to a file, both with expanding embed +// directives fully and with printing the directive instead. +const char data[] = { +#embed if_empty('a', 'b') clang::offset(0) limit(1) suffix(, 'a', 0) prefix('h',) +}; + +// EXPANDED: const char data[] = {'h',106 , 'a', 0}; +// DIRECTIVE: const char data[] = { +// DIRECTIVE-NEXT: #embed if_empty('a', 'b') limit(1) clang::offset(0) prefix('h',) suffix(, 'a', 0) /* clang -E -dE */ +// DIRECTIVE-NEXT: }; + +const char more[] = { +#embed if_empty('a', 'b') +}; + +// EXPANDED: const char more[] = {'a', 'b'} +// DIRECTIVE: const char more[] = { +// DIRECTIVE-NEXT: #embed if_empty('a', 'b') /* clang -E -dE */ +// DIRECTIVE-NEXT: }; + +const char even_more[] = { + 1, 2, 3, +#embed prefix(4, 5,) suffix(, 6, 7) + , 8, 9, 10 +}; + +// EXPANDED: const char even_more[] = { +// EXPANDED-NEXT: 1, 2, 3,4, 5,106, 107 , 6, 7 , 8, 9, 10 +// EXPANDED-EMPTY: +// EXPANDED-EMPTY: +// EXPANDED-NEXT: }; +// DIRECTIVE: const char even_more[] = { +// DIRECTIVE-NEXT: 1, 2, 3, +// DIRECTIVE-NEXT: #embed prefix(4, 5,) suffix(, 6, 7) /* clang -E -dE */ +// DIRECTIVE-NEXT: , 8, 9, 10 +// DIRECTIVE-NEXT: }; diff --git a/clang/test/Preprocessor/embed_single_entity.c b/clang/test/Preprocessor/embed_single_entity.c new file mode 100644 index 00000000000000..2019118b48d322 --- /dev/null +++ b/clang/test/Preprocessor/embed_single_entity.c @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 %s -fsyntax-only -std=c23 --embed-dir=%S/Inputs -verify + +const char data = +#embed +; +_Static_assert('b' == data); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_weird.cpp b/clang/test/Preprocessor/embed_weird.cpp new file mode 100644 index 00000000000000..a90d3bc330538b --- /dev/null +++ b/clang/test/Preprocessor/embed_weird.cpp @@ -0,0 +1,116 @@ +// RUN: printf "\0" > %S/Inputs/null_byte.bin +// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify=expected,cxx -Wno-c23-extensions +// RUN: %clang_cc1 -x c -std=c23 %s -fsyntax-only --embed-dir=%S/Inputs -verify=expected,c +// RUN: rm %S/Inputs/null_byte.bin +#embed +; + +void f (unsigned char x) { (void)x;} +void g () {} +void h (unsigned char x, int y) {(void)x; (void)y;} +int i () { + return +#embed + ; +} + +_Static_assert( +#embed suffix(,) +"" +); +_Static_assert( +#embed +, "" +); +_Static_assert(sizeof( +#embed +) == +sizeof(unsigned char) +, "" +); +_Static_assert(sizeof +#embed +, "" +); +_Static_assert(sizeof( +#embed // expected-warning {{left operand of comma operator has no effect}} +) == +sizeof(unsigned char) +, "" +); + +#ifdef __cplusplus +template +void j() { + static_assert(First == 'j', ""); + static_assert(Second == 'k', ""); +} +#endif + +void do_stuff() { + f( +#embed + ); + g( +#embed + ); + h( +#embed + ); + int r = i(); + (void)r; +#ifdef __cplusplus + j< +#embed + >( +#embed + ); +#endif +} + +// Ensure that we don't accidentally allow you to initialize an unsigned char * +// from embedded data; the data is modeled as a string literal internally, but +// is not actually a string literal. +const unsigned char *ptr = +#embed // expected-warning {{left operand of comma operator has no effect}} +; // c-error@-2 {{incompatible integer to pointer conversion initializing 'const unsigned char *' with an expression of type 'unsigned char'}} \ + cxx-error@-2 {{cannot initialize a variable of type 'const unsigned char *' with an rvalue of type 'unsigned char'}} + +// However, there are some cases where this is fine and should work. +const unsigned char *null_ptr_1 = +#embed if_empty(0) +; + +const unsigned char *null_ptr_2 = +#embed +; + +const unsigned char *null_ptr_3 = { +#embed +}; + +#define FILE_NAME +#define LIMIT 1 +#define OFFSET 0 +#define EMPTY_SUFFIX suffix() + +constexpr unsigned char ch = +#embed FILE_NAME limit(LIMIT) clang::offset(OFFSET) EMPTY_SUFFIX +; +static_assert(ch == 0); + +void foobar(float x, char y, char z); // cxx-note {{candidate function not viable: requires 3 arguments, but 1 was provided}} + // c-note@-1 {{declared here}} +void g1() { foobar((float) // cxx-error {{no matching function for call to 'foobar'}} +#embed "numbers.txt" limit(3) // expected-warning {{left operand of comma operator has no effect}} +); // c-error {{too few arguments to function call, expected 3, have 1}} +} + +#if __cplusplus +struct S { S(char x); ~S(); }; +void f1() { + S s[] = { +#embed "null_byte.bin" + }; +} +#endif diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c index f0845985c9efc3..9e425ac1c5ce2e 100644 --- a/clang/test/Preprocessor/init-aarch64.c +++ b/clang/test/Preprocessor/init-aarch64.c @@ -272,6 +272,9 @@ // AARCH64-NEXT: #define __SIZE_WIDTH__ 64 // AARCH64_CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 16UL // AARCH64_CXX: #define __STDCPP_THREADS__ 1 +// AARCH64-NEXT: #define __STDC_EMBED_EMPTY__ 2 +// AARCH64-NEXT: #define __STDC_EMBED_FOUND__ 1 +// AARCH64-NEXT: #define __STDC_EMBED_NOT_FOUND__ 0 // AARCH64-NEXT: #define __STDC_HOSTED__ 1 // AARCH64-NEXT: #define __STDC_UTF_16__ 1 // AARCH64-NEXT: #define __STDC_UTF_32__ 1 diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c index 6e7c0ea5c730b1..12ebaeaedaffa4 100644 --- a/clang/test/Preprocessor/init.c +++ b/clang/test/Preprocessor/init.c @@ -1875,6 +1875,9 @@ // WEBASSEMBLY-NEXT:#define __SIZE_TYPE__ long unsigned int // WEBASSEMBLY32-NEXT:#define __SIZE_WIDTH__ 32 // WEBASSEMBLY64-NEXT:#define __SIZE_WIDTH__ 64 +// WEBASSEMBLY-NEXT:#define __STDC_EMBED_EMPTY__ 2 +// WEBASSEMBLY-NEXT:#define __STDC_EMBED_FOUND__ 1 +// WEBASSEMBLY-NEXT:#define __STDC_EMBED_NOT_FOUND__ 0 // WEBASSEMBLY-NEXT:#define __STDC_HOSTED__ 0 // WEBASSEMBLY-NOT:#define __STDC_MB_MIGHT_NEQ_WC__ // WEBASSEMBLY-NOT:#define __STDC_NO_ATOMICS__ diff --git a/clang/test/Preprocessor/single_byte.txt b/clang/test/Preprocessor/single_byte.txt new file mode 100644 index 00000000000000..2e65efe2a145dd --- /dev/null +++ b/clang/test/Preprocessor/single_byte.txt @@ -0,0 +1 @@ +a \ No newline at end of file diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index 38002052227cd2..bc4b1628807901 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -335,6 +335,7 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::ObjCSubscriptRefExprClass: case Stmt::RecoveryExprClass: case Stmt::SYCLUniqueStableNameExprClass: + case Stmt::EmbedExprClass: K = CXCursor_UnexposedExpr; break; diff --git a/clang/www/c_status.html b/clang/www/c_status.html index a94c606c3244a4..7fe633aa7e446b 100644 --- a/clang/www/c_status.html +++ b/clang/www/c_status.html @@ -1213,7 +1213,7 @@

C23 implementation status

#embed N3017 - No + Clang 19