-
Notifications
You must be signed in to change notification settings - Fork 53.5k
/
xstate.c
1945 lines (1659 loc) · 53.3 KB
/
xstate.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// SPDX-License-Identifier: GPL-2.0-only
/*
* xsave/xrstor support.
*
* Author: Suresh Siddha <suresh.b.siddha@intel.com>
*/
#include <linux/bitops.h>
#include <linux/compat.h>
#include <linux/cpu.h>
#include <linux/mman.h>
#include <linux/nospec.h>
#include <linux/pkeys.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/coredump.h>
#include <asm/fpu/api.h>
#include <asm/fpu/regset.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/xcr.h>
#include <asm/tlbflush.h>
#include <asm/prctl.h>
#include <asm/elf.h>
#include <uapi/asm/elf.h>
#include "context.h"
#include "internal.h"
#include "legacy.h"
#include "xstate.h"
#define for_each_extended_xfeature(bit, mask) \
(bit) = FIRST_EXTENDED_XFEATURE; \
for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask))
/*
* Although we spell it out in here, the Processor Trace
* xfeature is completely unused. We use other mechanisms
* to save/restore PT state in Linux.
*/
static const char *xfeature_names[] =
{
"x87 floating point registers",
"SSE registers",
"AVX registers",
"MPX bounds registers",
"MPX CSR",
"AVX-512 opmask",
"AVX-512 Hi256",
"AVX-512 ZMM_Hi256",
"Processor Trace (unused)",
"Protection Keys User registers",
"PASID state",
"Control-flow User registers",
"Control-flow Kernel registers (unused)",
"unknown xstate feature",
"unknown xstate feature",
"unknown xstate feature",
"unknown xstate feature",
"AMX Tile config",
"AMX Tile data",
"unknown xstate feature",
};
static unsigned short xsave_cpuid_features[] __initdata = {
[XFEATURE_FP] = X86_FEATURE_FPU,
[XFEATURE_SSE] = X86_FEATURE_XMM,
[XFEATURE_YMM] = X86_FEATURE_AVX,
[XFEATURE_BNDREGS] = X86_FEATURE_MPX,
[XFEATURE_BNDCSR] = X86_FEATURE_MPX,
[XFEATURE_OPMASK] = X86_FEATURE_AVX512F,
[XFEATURE_ZMM_Hi256] = X86_FEATURE_AVX512F,
[XFEATURE_Hi16_ZMM] = X86_FEATURE_AVX512F,
[XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT,
[XFEATURE_PKRU] = X86_FEATURE_OSPKE,
[XFEATURE_PASID] = X86_FEATURE_ENQCMD,
[XFEATURE_CET_USER] = X86_FEATURE_SHSTK,
[XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE,
[XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE,
};
static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
{ [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
{ [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
#define XSTATE_FLAG_SUPERVISOR BIT(0)
#define XSTATE_FLAG_ALIGNED64 BIT(1)
/*
* Return whether the system supports a given xfeature.
*
* Also return the name of the (most advanced) feature that the caller requested:
*/
int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
{
u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features;
if (unlikely(feature_name)) {
long xfeature_idx, max_idx;
u64 xfeatures_print;
/*
* So we use FLS here to be able to print the most advanced
* feature that was requested but is missing. So if a driver
* asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
* missing AVX feature - this is the most informative message
* to users:
*/
if (xfeatures_missing)
xfeatures_print = xfeatures_missing;
else
xfeatures_print = xfeatures_needed;
xfeature_idx = fls64(xfeatures_print)-1;
max_idx = ARRAY_SIZE(xfeature_names)-1;
xfeature_idx = min(xfeature_idx, max_idx);
*feature_name = xfeature_names[xfeature_idx];
}
if (xfeatures_missing)
return 0;
return 1;
}
EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
static bool xfeature_is_aligned64(int xfeature_nr)
{
return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64;
}
static bool xfeature_is_supervisor(int xfeature_nr)
{
return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR;
}
static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
{
unsigned int offs, i;
/*
* Non-compacted format and legacy features use the cached fixed
* offsets.
*/
if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
xfeature <= XFEATURE_SSE)
return xstate_offsets[xfeature];
/*
* Compacted format offsets depend on the actual content of the
* compacted xsave area which is determined by the xcomp_bv header
* field.
*/
offs = FXSAVE_SIZE + XSAVE_HDR_SIZE;
for_each_extended_xfeature(i, xcomp_bv) {
if (xfeature_is_aligned64(i))
offs = ALIGN(offs, 64);
if (i == xfeature)
break;
offs += xstate_sizes[i];
}
return offs;
}
/*
* Enable the extended processor state save/restore feature.
* Called once per CPU onlining.
*/
void fpu__init_cpu_xstate(void)
{
if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features)
return;
cr4_set_bits(X86_CR4_OSXSAVE);
/*
* Must happen after CR4 setup and before xsetbv() to allow KVM
* lazy passthrough. Write independent of the dynamic state static
* key as that does not work on the boot CPU. This also ensures
* that any stale state is wiped out from XFD. Reset the per CPU
* xfd cache too.
*/
if (cpu_feature_enabled(X86_FEATURE_XFD))
xfd_set_state(init_fpstate.xfd);
/*
* XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
* managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user
* states can be set here.
*/
xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
/*
* MSR_IA32_XSS sets supervisor states managed by XSAVES.
*/
if (boot_cpu_has(X86_FEATURE_XSAVES)) {
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
xfeatures_mask_independent());
}
}
static bool xfeature_enabled(enum xfeature xfeature)
{
return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
}
/*
* Record the offsets and sizes of various xstates contained
* in the XSAVE state memory layout.
*/
static void __init setup_xstate_cache(void)
{
u32 eax, ebx, ecx, edx, i;
/* start at the beginning of the "extended state" */
unsigned int last_good_offset = offsetof(struct xregs_state,
extended_state_area);
/*
* The FP xstates and SSE xstates are legacy states. They are always
* in the fixed offsets in the xsave area in either compacted form
* or standard form.
*/
xstate_offsets[XFEATURE_FP] = 0;
xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state,
xmm_space);
xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP];
xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state,
xmm_space);
for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
xstate_sizes[i] = eax;
xstate_flags[i] = ecx;
/*
* If an xfeature is supervisor state, the offset in EBX is
* invalid, leave it to -1.
*/
if (xfeature_is_supervisor(i))
continue;
xstate_offsets[i] = ebx;
/*
* In our xstate size checks, we assume that the highest-numbered
* xstate feature has the highest offset in the buffer. Ensure
* it does.
*/
WARN_ONCE(last_good_offset > xstate_offsets[i],
"x86/fpu: misordered xstate at %d\n", last_good_offset);
last_good_offset = xstate_offsets[i];
}
}
static void __init print_xstate_feature(u64 xstate_mask)
{
const char *feature_name;
if (cpu_has_xfeatures(xstate_mask, &feature_name))
pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
}
/*
* Print out all the supported xstate features:
*/
static void __init print_xstate_features(void)
{
print_xstate_feature(XFEATURE_MASK_FP);
print_xstate_feature(XFEATURE_MASK_SSE);
print_xstate_feature(XFEATURE_MASK_YMM);
print_xstate_feature(XFEATURE_MASK_BNDREGS);
print_xstate_feature(XFEATURE_MASK_BNDCSR);
print_xstate_feature(XFEATURE_MASK_OPMASK);
print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
print_xstate_feature(XFEATURE_MASK_PKRU);
print_xstate_feature(XFEATURE_MASK_PASID);
print_xstate_feature(XFEATURE_MASK_CET_USER);
print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
}
/*
* This check is important because it is easy to get XSTATE_*
* confused with XSTATE_BIT_*.
*/
#define CHECK_XFEATURE(nr) do { \
WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \
WARN_ON(nr >= XFEATURE_MAX); \
} while (0)
/*
* Print out xstate component offsets and sizes
*/
static void __init print_xstate_offset_size(void)
{
int i;
for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
i, xfeature_get_offset(fpu_kernel_cfg.max_features, i),
i, xstate_sizes[i]);
}
}
/*
* This function is called only during boot time when x86 caps are not set
* up and alternative can not be used yet.
*/
static __init void os_xrstor_booting(struct xregs_state *xstate)
{
u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE;
u32 lmask = mask;
u32 hmask = mask >> 32;
int err;
if (cpu_feature_enabled(X86_FEATURE_XSAVES))
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
/*
* We should never fault when copying from a kernel buffer, and the FPU
* state we set at boot time should be valid.
*/
WARN_ON_FPU(err);
}
/*
* All supported features have either init state all zeros or are
* handled in setup_init_fpu() individually. This is an explicit
* feature list and does not use XFEATURE_MASK*SUPPORTED to catch
* newly added supported features at build time and make people
* actually look at the init state for the new feature.
*/
#define XFEATURES_INIT_FPSTATE_HANDLED \
(XFEATURE_MASK_FP | \
XFEATURE_MASK_SSE | \
XFEATURE_MASK_YMM | \
XFEATURE_MASK_OPMASK | \
XFEATURE_MASK_ZMM_Hi256 | \
XFEATURE_MASK_Hi16_ZMM | \
XFEATURE_MASK_PKRU | \
XFEATURE_MASK_BNDREGS | \
XFEATURE_MASK_BNDCSR | \
XFEATURE_MASK_PASID | \
XFEATURE_MASK_CET_USER | \
XFEATURE_MASK_XTILE)
/*
* setup the xstate image representing the init state
*/
static void __init setup_init_fpu_buf(void)
{
BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
XFEATURES_INIT_FPSTATE_HANDLED);
if (!boot_cpu_has(X86_FEATURE_XSAVE))
return;
print_xstate_features();
xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures);
/*
* Init all the features state with header.xfeatures being 0x0
*/
os_xrstor_booting(&init_fpstate.regs.xsave);
/*
* All components are now in init state. Read the state back so
* that init_fpstate contains all non-zero init state. This only
* works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
* those use the init optimization which skips writing data for
* components in init state.
*
* XSAVE could be used, but that would require to reshuffle the
* data when XSAVEC/S is available because XSAVEC/S uses xstate
* compaction. But doing so is a pointless exercise because most
* components have an all zeros init state except for the legacy
* ones (FP and SSE). Those can be saved with FXSAVE into the
* legacy area. Adding new features requires to ensure that init
* state is all zeroes or if not to add the necessary handling
* here.
*/
fxsave(&init_fpstate.regs.fxsave);
}
int xfeature_size(int xfeature_nr)
{
u32 eax, ebx, ecx, edx;
CHECK_XFEATURE(xfeature_nr);
cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
return eax;
}
/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
static int validate_user_xstate_header(const struct xstate_header *hdr,
struct fpstate *fpstate)
{
/* No unknown or supervisor features may be set */
if (hdr->xfeatures & ~fpstate->user_xfeatures)
return -EINVAL;
/* Userspace must use the uncompacted format */
if (hdr->xcomp_bv)
return -EINVAL;
/*
* If 'reserved' is shrunken to add a new field, make sure to validate
* that new field here!
*/
BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
/* No reserved bits may be set */
if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
return -EINVAL;
return 0;
}
static void __init __xstate_dump_leaves(void)
{
int i;
u32 eax, ebx, ecx, edx;
static int should_dump = 1;
if (!should_dump)
return;
should_dump = 0;
/*
* Dump out a few leaves past the ones that we support
* just in case there are some goodies up there
*/
for (i = 0; i < XFEATURE_MAX + 10; i++) {
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
XSTATE_CPUID, i, eax, ebx, ecx, edx);
}
}
#define XSTATE_WARN_ON(x, fmt, ...) do { \
if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) { \
__xstate_dump_leaves(); \
} \
} while (0)
#define XCHECK_SZ(sz, nr, __struct) ({ \
if (WARN_ONCE(sz != sizeof(__struct), \
"[%s]: struct is %zu bytes, cpu state %d bytes\n", \
xfeature_names[nr], sizeof(__struct), sz)) { \
__xstate_dump_leaves(); \
} \
true; \
})
/**
* check_xtile_data_against_struct - Check tile data state size.
*
* Calculate the state size by multiplying the single tile size which is
* recorded in a C struct, and the number of tiles that the CPU informs.
* Compare the provided size with the calculation.
*
* @size: The tile data state size
*
* Returns: 0 on success, -EINVAL on mismatch.
*/
static int __init check_xtile_data_against_struct(int size)
{
u32 max_palid, palid, state_size;
u32 eax, ebx, ecx, edx;
u16 max_tile;
/*
* Check the maximum palette id:
* eax: the highest numbered palette subleaf.
*/
cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx);
/*
* Cross-check each tile size and find the maximum number of
* supported tiles.
*/
for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
u16 tile_size, max;
/*
* Check the tile size info:
* eax[31:16]: bytes per title
* ebx[31:16]: the max names (or max number of tiles)
*/
cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx);
tile_size = eax >> 16;
max = ebx >> 16;
if (tile_size != sizeof(struct xtile_data)) {
pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
__stringify(XFEATURE_XTILE_DATA),
sizeof(struct xtile_data), tile_size);
__xstate_dump_leaves();
return -EINVAL;
}
if (max > max_tile)
max_tile = max;
}
state_size = sizeof(struct xtile_data) * max_tile;
if (size != state_size) {
pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
__stringify(XFEATURE_XTILE_DATA), state_size, size);
__xstate_dump_leaves();
return -EINVAL;
}
return 0;
}
/*
* We have a C struct for each 'xstate'. We need to ensure
* that our software representation matches what the CPU
* tells us about the state's size.
*/
static bool __init check_xstate_against_struct(int nr)
{
/*
* Ask the CPU for the size of the state.
*/
int sz = xfeature_size(nr);
/*
* Match each CPU state with the corresponding software
* structure.
*/
switch (nr) {
case XFEATURE_YMM: return XCHECK_SZ(sz, nr, struct ymmh_struct);
case XFEATURE_BNDREGS: return XCHECK_SZ(sz, nr, struct mpx_bndreg_state);
case XFEATURE_BNDCSR: return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state);
case XFEATURE_OPMASK: return XCHECK_SZ(sz, nr, struct avx_512_opmask_state);
case XFEATURE_ZMM_Hi256: return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state);
case XFEATURE_Hi16_ZMM: return XCHECK_SZ(sz, nr, struct avx_512_hi16_state);
case XFEATURE_PKRU: return XCHECK_SZ(sz, nr, struct pkru_state);
case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg);
case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state);
case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
default:
XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
return false;
}
return true;
}
static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
{
unsigned int topmost = fls64(xfeatures) - 1;
unsigned int offset = xstate_offsets[topmost];
if (topmost <= XFEATURE_SSE)
return sizeof(struct xregs_state);
if (compacted)
offset = xfeature_get_offset(xfeatures, topmost);
return offset + xstate_sizes[topmost];
}
/*
* This essentially double-checks what the cpu told us about
* how large the XSAVE buffer needs to be. We are recalculating
* it to be safe.
*
* Independent XSAVE features allocate their own buffers and are not
* covered by these checks. Only the size of the buffer for task->fpu
* is checked here.
*/
static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
{
bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
int i;
for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
if (!check_xstate_against_struct(i))
return false;
/*
* Supervisor state components can be managed only by
* XSAVES.
*/
if (!xsaves && xfeature_is_supervisor(i)) {
XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i);
return false;
}
}
size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted);
XSTATE_WARN_ON(size != kernel_size,
"size %u != kernel_size %u\n", size, kernel_size);
return size == kernel_size;
}
/*
* Get total size of enabled xstates in XCR0 | IA32_XSS.
*
* Note the SDM's wording here. "sub-function 0" only enumerates
* the size of the *user* states. If we use it to size a buffer
* that we use 'XSAVES' on, we could potentially overflow the
* buffer because 'XSAVES' saves system states too.
*
* This also takes compaction into account. So this works for
* XSAVEC as well.
*/
static unsigned int __init get_compacted_size(void)
{
unsigned int eax, ebx, ecx, edx;
/*
* - CPUID function 0DH, sub-function 1:
* EBX enumerates the size (in bytes) required by
* the XSAVES instruction for an XSAVE area
* containing all the state components
* corresponding to bits currently set in
* XCR0 | IA32_XSS.
*
* When XSAVES is not available but XSAVEC is (virt), then there
* are no supervisor states, but XSAVEC still uses compacted
* format.
*/
cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
return ebx;
}
/*
* Get the total size of the enabled xstates without the independent supervisor
* features.
*/
static unsigned int __init get_xsave_compacted_size(void)
{
u64 mask = xfeatures_mask_independent();
unsigned int size;
if (!mask)
return get_compacted_size();
/* Disable independent features. */
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
/*
* Ask the hardware what size is required of the buffer.
* This is the size required for the task->fpu buffer.
*/
size = get_compacted_size();
/* Re-enable independent features so XSAVES will work on them again. */
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
return size;
}
static unsigned int __init get_xsave_size_user(void)
{
unsigned int eax, ebx, ecx, edx;
/*
* - CPUID function 0DH, sub-function 0:
* EBX enumerates the size (in bytes) required by
* the XSAVE instruction for an XSAVE area
* containing all the *user* state components
* corresponding to bits currently set in XCR0.
*/
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
return ebx;
}
static int __init init_xstate_size(void)
{
/* Recompute the context size for enabled features: */
unsigned int user_size, kernel_size, kernel_default_size;
bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
/* Uncompacted user space size */
user_size = get_xsave_size_user();
/*
* XSAVES kernel size includes supervisor states and uses compacted
* format. XSAVEC uses compacted format, but does not save
* supervisor states.
*
* XSAVE[OPT] do not support supervisor states so kernel and user
* size is identical.
*/
if (compacted)
kernel_size = get_xsave_compacted_size();
else
kernel_size = user_size;
kernel_default_size =
xstate_calculate_size(fpu_kernel_cfg.default_features, compacted);
if (!paranoid_xstate_size_valid(kernel_size))
return -EINVAL;
fpu_kernel_cfg.max_size = kernel_size;
fpu_user_cfg.max_size = user_size;
fpu_kernel_cfg.default_size = kernel_default_size;
fpu_user_cfg.default_size =
xstate_calculate_size(fpu_user_cfg.default_features, false);
return 0;
}
/*
* We enabled the XSAVE hardware, but something went wrong and
* we can not use it. Disable it.
*/
static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
{
fpu_kernel_cfg.max_features = 0;
cr4_clear_bits(X86_CR4_OSXSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
/* Restore the legacy size.*/
fpu_kernel_cfg.max_size = legacy_size;
fpu_kernel_cfg.default_size = legacy_size;
fpu_user_cfg.max_size = legacy_size;
fpu_user_cfg.default_size = legacy_size;
/*
* Prevent enabling the static branch which enables writes to the
* XFD MSR.
*/
init_fpstate.xfd = 0;
fpstate_reset(¤t->thread.fpu);
}
/*
* Enable and initialize the xsave feature.
* Called once per system bootup.
*/
void __init fpu__init_system_xstate(unsigned int legacy_size)
{
unsigned int eax, ebx, ecx, edx;
u64 xfeatures;
int err;
int i;
if (!boot_cpu_has(X86_FEATURE_FPU)) {
pr_info("x86/fpu: No FPU detected\n");
return;
}
if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
pr_info("x86/fpu: x87 FPU will use %s\n",
boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
return;
}
if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
WARN_ON_FPU(1);
return;
}
/*
* Find user xstates supported by the processor.
*/
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
fpu_kernel_cfg.max_features = eax + ((u64)edx << 32);
/*
* Find supervisor xstates supported by the processor.
*/
cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32);
if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
/*
* This indicates that something really unexpected happened
* with the enumeration. Disable XSAVE and try to continue
* booting without it. This is too early to BUG().
*/
pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
fpu_kernel_cfg.max_features);
goto out_disable;
}
fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features &
XFEATURE_MASK_INDEPENDENT;
/*
* Clear XSAVE features that are disabled in the normal CPUID.
*/
for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
unsigned short cid = xsave_cpuid_features[i];
/* Careful: X86_FEATURE_FPU is 0! */
if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid))
fpu_kernel_cfg.max_features &= ~BIT_ULL(i);
}
if (!cpu_feature_enabled(X86_FEATURE_XFD))
fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
else
fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
XFEATURE_MASK_SUPERVISOR_SUPPORTED;
fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
/* Clean out dynamic features from default */
fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features;
fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
fpu_user_cfg.default_features = fpu_user_cfg.max_features;
fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
/* Store it for paranoia check at the end */
xfeatures = fpu_kernel_cfg.max_features;
/*
* Initialize the default XFD state in initfp_state and enable the
* dynamic sizing mechanism if dynamic states are available. The
* static key cannot be enabled here because this runs before
* jump_label_init(). This is delayed to an initcall.
*/
init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
/* Set up compaction feature bit */
if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
cpu_feature_enabled(X86_FEATURE_XSAVES))
setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
/* Enable xstate instructions to be able to continue with initialization: */
fpu__init_cpu_xstate();
/* Cache size, offset and flags for initialization */
setup_xstate_cache();
err = init_xstate_size();
if (err)
goto out_disable;
/* Reset the state for the current task */
fpstate_reset(¤t->thread.fpu);
/*
* Update info used for ptrace frames; use standard-format size and no
* supervisor xstates:
*/
update_regset_xstate_info(fpu_user_cfg.max_size,
fpu_user_cfg.max_features);
/*
* init_fpstate excludes dynamic states as they are large but init
* state is zero.
*/
init_fpstate.size = fpu_kernel_cfg.default_size;
init_fpstate.xfeatures = fpu_kernel_cfg.default_features;
if (init_fpstate.size > sizeof(init_fpstate.regs)) {
pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n",
sizeof(init_fpstate.regs), init_fpstate.size);
goto out_disable;
}
setup_init_fpu_buf();
/*
* Paranoia check whether something in the setup modified the
* xfeatures mask.
*/
if (xfeatures != fpu_kernel_cfg.max_features) {
pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
xfeatures, fpu_kernel_cfg.max_features);
goto out_disable;
}
/*
* CPU capabilities initialization runs before FPU init. So
* X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
* functional, set the feature bit so depending code works.
*/
setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
print_xstate_offset_size();
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
fpu_kernel_cfg.max_features,
fpu_kernel_cfg.max_size,
boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
return;
out_disable:
/* something went wrong, try to boot without any XSAVE support */
fpu__init_disable_system_xstate(legacy_size);
}
/*
* Restore minimal FPU state after suspend:
*/
void fpu__resume_cpu(void)
{
/*
* Restore XCR0 on xsave capable CPUs:
*/
if (cpu_feature_enabled(X86_FEATURE_XSAVE))
xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
/*
* Restore IA32_XSS. The same CPUID bit enumerates support
* of XSAVES and MSR_IA32_XSS.
*/
if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
xfeatures_mask_independent());
}
if (fpu_state_size_dynamic())
wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd);
}
/*
* Given an xstate feature nr, calculate where in the xsave
* buffer the state is. Callers should ensure that the buffer
* is valid.
*/
static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
{
u64 xcomp_bv = xsave->header.xcomp_bv;
if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
return NULL;
if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
return NULL;
}
return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr);
}
/*
* Given the xsave area and a state inside, this function returns the
* address of the state.
*
* This is the API that is called to get xstate address in either
* standard format or compacted format of xsave area.
*
* Note that if there is no data for the field in the xsave buffer
* this will return NULL.
*
* Inputs:
* xstate: the thread's storage area for all FPU data
* xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
* XFEATURE_SSE, etc...)
* Output:
* address of the state in the xsave area, or NULL if the
* field is not present in the xsave buffer.
*/
void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
{
/*
* Do we even *have* xsave state?
*/
if (!boot_cpu_has(X86_FEATURE_XSAVE))
return NULL;
/*
* We should not ever be requesting features that we
* have not enabled.
*/
if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
return NULL;
/*
* This assumes the last 'xsave*' instruction to
* have requested that 'xfeature_nr' be saved.
* If it did not, we might be seeing and old value
* of the field in the buffer.
*
* This can happen because the last 'xsave' did not
* request that this feature be saved (unlikely)
* or because the "init optimization" caused it
* to not be saved.
*/
if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
return NULL;
return __raw_xsave_addr(xsave, xfeature_nr);
}
EXPORT_SYMBOL_GPL(get_xsave_addr);