Skip to content

Commit

Permalink
Remove support for 3DNow!, both intrinsics and builtins. (#96246)
Browse files Browse the repository at this point in the history
This set of instructions was only supported by AMD chips starting in
the K6-2 (introduced 1998), and before the "Bulldozer" family
(2011). They were never much used, as they were effectively superseded
by the more-widely-implemented SSE (first implemented on the AMD side
in Athlon XP in 2001).

This is being done as a predecessor towards general removal of MMX
register usage. Since there is almost no usage of the 3DNow!
intrinsics, and no modern hardware even implements them, simple
removal seems like the best option.

(Clang half originally uploaded in https://reviews.llvm.org/D94213)

Works towards issue #41665 and issue #98272.
  • Loading branch information
jyknight authored Jul 16, 2024
1 parent 3706c12 commit f0eb558
Show file tree
Hide file tree
Showing 29 changed files with 155 additions and 2,337 deletions.
19 changes: 19 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1078,6 +1078,25 @@ X86 Support
^^^^^^^^^^^

- Remove knl/knm specific ISA supports: AVX512PF, AVX512ER, PREFETCHWT1
- Support has been removed for the AMD "3DNow!" instruction-set.
Neither modern AMD CPUs, nor any Intel CPUs implement these
instructions, and they were never widely used.

* The options ``-m3dnow`` and ``-m3dnowa`` are no longer honored, and will emit a warning if used.
* The macros ``__3dNOW__`` and ``__3dNOW_A__`` are no longer ever set by the compiler.
* The header ``<mm3dnow.h>`` is deprecated, and emits a warning if included.
* The 3dNow intrinsic functions have been removed: ``_m_femms``,
``_m_pavgusb``, ``_m_pf2id``, ``_m_pfacc``, ``_m_pfadd``,
``_m_pfcmpeq``, ``_m_pfcmpge``, ``_m_pfcmpgt``, ``_m_pfmax``,
``_m_pfmin``, ``_m_pfmul``, ``_m_pfrcp``, ``_m_pfrcpit1``,
``_m_pfrcpit2``, ``_m_pfrsqrt``, ``_m_pfrsqrtit1``, ``_m_pfsub``,
``_m_pfsubr``, ``_m_pi2fd``, ``_m_pmulhrw``, ``_m_pf2iw``,
``_m_pfnacc``, ``_m_pfpnacc``, ``_m_pi2fw``, ``_m_pswapdsf``,
``_m_pswapdsi``.
* The compiler builtins corresponding to each of the above
intrinsics have also been removed (``__builtin_ia32_femms``, and so on).
* "3DNow!" instructions remain supported in assembly code, including
inside inline-assembly.

Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
30 changes: 0 additions & 30 deletions clang/include/clang/Basic/BuiltinsX86.def
Original file line number Diff line number Diff line change
Expand Up @@ -37,36 +37,6 @@ TARGET_BUILTIN(__builtin_ia32_undef512, "V8d", "ncV:512:", "")
TARGET_BUILTIN(__builtin_ia32_readeflags_u32, "Ui", "n", "")
TARGET_BUILTIN(__builtin_ia32_writeeflags_u32, "vUi", "n", "")

// 3DNow!
//
TARGET_BUILTIN(__builtin_ia32_femms, "v", "n", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pavgusb, "V8cV8cV8c", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pf2id, "V2iV2f", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pfacc, "V2fV2fV2f", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pfadd, "V2fV2fV2f", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pfcmpeq, "V2iV2fV2f", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pfcmpge, "V2iV2fV2f", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pfcmpgt, "V2iV2fV2f", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pfmax, "V2fV2fV2f", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pfmin, "V2fV2fV2f", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pfmul, "V2fV2fV2f", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pfrcp, "V2fV2f", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pfrcpit1, "V2fV2fV2f", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pfrcpit2, "V2fV2fV2f", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pfrsqrt, "V2fV2f", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pfrsqit1, "V2fV2fV2f", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pfsub, "V2fV2fV2f", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pfsubr, "V2fV2fV2f", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pi2fd, "V2fV2i", "ncV:64:", "3dnow")
TARGET_BUILTIN(__builtin_ia32_pmulhrw, "V4sV4sV4s", "ncV:64:", "3dnow")
// 3DNow! Extensions (3dnowa).
TARGET_BUILTIN(__builtin_ia32_pf2iw, "V2iV2f", "ncV:64:", "3dnowa")
TARGET_BUILTIN(__builtin_ia32_pfnacc, "V2fV2fV2f", "ncV:64:", "3dnowa")
TARGET_BUILTIN(__builtin_ia32_pfpnacc, "V2fV2fV2f", "ncV:64:", "3dnowa")
TARGET_BUILTIN(__builtin_ia32_pi2fw, "V2fV2i", "ncV:64:", "3dnowa")
TARGET_BUILTIN(__builtin_ia32_pswapdsf, "V2fV2f", "ncV:64:", "3dnowa")
TARGET_BUILTIN(__builtin_ia32_pswapdsi, "V2iV2i", "ncV:64:", "3dnowa")

// MMX
//
// All MMX instructions will be generated via builtins. Any MMX vector
Expand Down
10 changes: 6 additions & 4 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -6135,10 +6135,6 @@ def mno_80387 : Flag<["-"], "mno-80387">, Alias<mno_x87>;
def mno_fp_ret_in_387 : Flag<["-"], "mno-fp-ret-in-387">, Alias<mno_x87>;
def mmmx : Flag<["-"], "mmmx">, Group<m_x86_Features_Group>;
def mno_mmx : Flag<["-"], "mno-mmx">, Group<m_x86_Features_Group>;
def m3dnow : Flag<["-"], "m3dnow">, Group<m_x86_Features_Group>;
def mno_3dnow : Flag<["-"], "mno-3dnow">, Group<m_x86_Features_Group>;
def m3dnowa : Flag<["-"], "m3dnowa">, Group<m_x86_Features_Group>;
def mno_3dnowa : Flag<["-"], "mno-3dnowa">, Group<m_x86_Features_Group>;
def mamx_bf16 : Flag<["-"], "mamx-bf16">, Group<m_x86_Features_Group>;
def mno_amx_bf16 : Flag<["-"], "mno-amx-bf16">, Group<m_x86_Features_Group>;
def mamx_complex : Flag<["-"], "mamx-complex">, Group<m_x86_Features_Group>;
Expand Down Expand Up @@ -6372,6 +6368,12 @@ def mvevpu : Flag<["-"], "mvevpu">, Group<m_ve_Features_Group>,
def mno_vevpu : Flag<["-"], "mno-vevpu">, Group<m_ve_Features_Group>;
} // let Flags = [TargetSpecific]

// Unsupported X86 feature flags (triggers a warning)
def m3dnow : Flag<["-"], "m3dnow">;
def mno_3dnow : Flag<["-"], "mno-3dnow">;
def m3dnowa : Flag<["-"], "m3dnowa">;
def mno_3dnowa : Flag<["-"], "mno-3dnowa">;

// These are legacy user-facing driver-level option spellings. They are always
// aliases for options that are spelled using the more common Unix / GNU flag
// style of double-dash and equals-joined flags.
Expand Down
29 changes: 5 additions & 24 deletions clang/lib/Basic/Targets/X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,9 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
if (Feature[0] != '+')
continue;

if (Feature == "+aes") {
if (Feature == "+mmx") {
HasMMX = true;
} else if (Feature == "+aes") {
HasAES = true;
} else if (Feature == "+vaes") {
HasVAES = true;
Expand Down Expand Up @@ -487,13 +489,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
// for bfloat16 arithmetic operations in the front-end.
HasBFloat16 = SSELevel >= SSE2;

MMX3DNowEnum ThreeDNowLevel = llvm::StringSwitch<MMX3DNowEnum>(Feature)
.Case("+3dnowa", AMD3DNowAthlon)
.Case("+3dnow", AMD3DNow)
.Case("+mmx", MMX)
.Default(NoMMX3DNow);
MMX3DNowLevel = std::max(MMX3DNowLevel, ThreeDNowLevel);

XOPEnum XLevel = llvm::StringSwitch<XOPEnum>(Feature)
.Case("+xop", XOP)
.Case("+fma4", FMA4)
Expand Down Expand Up @@ -1031,18 +1026,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
}

// Each case falls through to the previous one here.
switch (MMX3DNowLevel) {
case AMD3DNowAthlon:
Builder.defineMacro("__3dNOW_A__");
[[fallthrough]];
case AMD3DNow:
Builder.defineMacro("__3dNOW__");
[[fallthrough]];
case MMX:
if (HasMMX) {
Builder.defineMacro("__MMX__");
[[fallthrough]];
case NoMMX3DNow:
break;
}

if (CPU >= CK_i486 || CPU == CK_None) {
Expand All @@ -1061,8 +1046,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,

bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
return llvm::StringSwitch<bool>(Name)
.Case("3dnow", true)
.Case("3dnowa", true)
.Case("adx", true)
.Case("aes", true)
.Case("amx-bf16", true)
Expand Down Expand Up @@ -1232,9 +1215,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("widekl", HasWIDEKL)
.Case("lwp", HasLWP)
.Case("lzcnt", HasLZCNT)
.Case("mm3dnow", MMX3DNowLevel >= AMD3DNow)
.Case("mm3dnowa", MMX3DNowLevel >= AMD3DNowAthlon)
.Case("mmx", MMX3DNowLevel >= MMX)
.Case("mmx", HasMMX)
.Case("movbe", HasMOVBE)
.Case("movdiri", HasMOVDIRI)
.Case("movdir64b", HasMOVDIR64B)
Expand Down
10 changes: 2 additions & 8 deletions clang/lib/Basic/Targets/X86.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
AVX2,
AVX512F
} SSELevel = NoSSE;
enum MMX3DNowEnum {
NoMMX3DNow,
MMX,
AMD3DNow,
AMD3DNowAthlon
} MMX3DNowLevel = NoMMX3DNow;
bool HasMMX = false;
enum XOPEnum { NoXOP, SSE4A, FMA4, XOP } XOPLevel = NoXOP;
enum AddrSpace { ptr32_sptr = 270, ptr32_uptr = 271, ptr64 = 272 };

Expand Down Expand Up @@ -348,8 +343,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
return "avx512";
if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX)
return "avx";
if (getTriple().getArch() == llvm::Triple::x86 &&
MMX3DNowLevel == NoMMX3DNow)
if (getTriple().getArch() == llvm::Triple::x86 && !HasMMX)
return "no-mmx";
return "";
}
Expand Down
8 changes: 0 additions & 8 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15969,14 +15969,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {Ops[0]});
}

// 3DNow!
case X86::BI__builtin_ia32_pswapdsf:
case X86::BI__builtin_ia32_pswapdsi: {
llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
return Builder.CreateCall(F, Ops, "pswapd");
}
case X86::BI__builtin_ia32_rdrand16_step:
case X86::BI__builtin_ia32_rdrand32_step:
case X86::BI__builtin_ia32_rdrand64_step:
Expand Down
13 changes: 13 additions & 0 deletions clang/lib/Driver/ToolChains/Arch/X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,4 +310,17 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
Features.push_back("+prefer-no-scatter");
if (Args.hasArg(options::OPT_mapx_inline_asm_use_gpr32))
Features.push_back("+inline-asm-use-gpr32");

// Warn for removed 3dnow support
if (const Arg *A =
Args.getLastArg(options::OPT_m3dnowa, options::OPT_mno_3dnowa,
options::OPT_mno_3dnow)) {
if (A->getOption().matches(options::OPT_m3dnowa))
D.Diag(diag::warn_drv_clang_unsupported) << A->getAsString(Args);
}
if (const Arg *A =
Args.getLastArg(options::OPT_m3dnow, options::OPT_mno_3dnow)) {
if (A->getOption().matches(options::OPT_m3dnow))
D.Diag(diag::warn_drv_clang_unsupported) << A->getAsString(Args);
}
}
147 changes: 6 additions & 141 deletions clang/lib/Headers/mm3dnow.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,151 +7,16 @@
*===-----------------------------------------------------------------------===
*/

// 3dNow intrinsics are no longer supported.

#ifndef _MM3DNOW_H_INCLUDED
#define _MM3DNOW_H_INCLUDED

#ifndef _CLANG_DISABLE_CRT_DEPRECATION_WARNINGS
#warning "The <mm3dnow.h> header is deprecated, and 3dNow! intrinsics are unsupported. For other intrinsics, include <x86intrin.h>, instead."
#endif

#include <mmintrin.h>
#include <prfchwintrin.h>

typedef float __v2sf __attribute__((__vector_size__(8)));

/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"), __min_vector_width__(64)))

static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("3dnow")))
_m_femms(void) {
__builtin_ia32_femms();
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pavgusb(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pf2id(__m64 __m) {
return (__m64)__builtin_ia32_pf2id((__v2sf)__m);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfacc(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfadd(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfcmpeq(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfcmpge(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfcmpgt(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfmax(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfmin(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfmul(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfrcp(__m64 __m) {
return (__m64)__builtin_ia32_pfrcp((__v2sf)__m);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfrcpit1(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfrcpit2(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfrsqrt(__m64 __m) {
return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfrsqrtit1(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfsub(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfsubr(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pi2fd(__m64 __m) {
return (__m64)__builtin_ia32_pi2fd((__v2si)__m);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pmulhrw(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2);
}

/* Handle the 3dnowa instructions here. */
#undef __DEFAULT_FN_ATTRS
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"), __min_vector_width__(64)))

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pf2iw(__m64 __m) {
return (__m64)__builtin_ia32_pf2iw((__v2sf)__m);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfnacc(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pfpnacc(__m64 __m1, __m64 __m2) {
return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pi2fw(__m64 __m) {
return (__m64)__builtin_ia32_pi2fw((__v2si)__m);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pswapdsf(__m64 __m) {
return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m);
}

static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pswapdsi(__m64 __m) {
return (__m64)__builtin_ia32_pswapdsi((__v2si)__m);
}

#undef __DEFAULT_FN_ATTRS

#endif
4 changes: 0 additions & 4 deletions clang/lib/Headers/x86intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@

#include <immintrin.h>

#if !defined(__SCE__) || __has_feature(modules) || defined(__3dNOW__)
#include <mm3dnow.h>
#endif

#if !defined(__SCE__) || __has_feature(modules) || defined(__PRFCHW__)
#include <prfchwintrin.h>
#endif
Expand Down
Loading

0 comments on commit f0eb558

Please sign in to comment.