Skip to content

Commit

Permalink
pmap: standardize promotion conditions between amd64 and arm64
Browse files Browse the repository at this point in the history
On amd64, don't abort promotion due to a missing accessed bit in a
mapping before possibly write protecting that mapping.  Previously,
in some cases, we might not repromote after madvise(MADV_FREE) because
there was no write fault to trigger the repromotion.  Conversely, on
arm64, don't pointlessly, yet harmlessly, write protect physical pages
that aren't part of the physical superpage.

Don't count aborted promotions due to explicit promotion prohibition
(arm64) or hardware errata (amd64) as ordinary promotion failures.

Reviewed by:	kib, markj
MFC after:	2 weeks
Differential Revision:	https://reviews.freebsd.org/D36916
  • Loading branch information
alcriceedu committed Dec 12, 2022
1 parent 9dda00d commit f0878da
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 13 deletions.
37 changes: 30 additions & 7 deletions sys/amd64/amd64/pmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -6771,19 +6771,36 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte,

/*
* Examine the first PTE in the specified PTP. Abort if this PTE is
* either invalid, unused, or does not map the first 4KB physical page
* within a 2MB page.
* ineligible for promotion due to hardware errata, invalid, or does
* not map the first 4KB physical page within a 2MB page.
*/
firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
newpde = *firstpte;
if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V) ||
!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap,
newpde))) {
if (!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, newpde)))
return;
if ((newpde & ((PG_FRAME & PDRMASK) | PG_V)) != PG_V) {
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
return;
}

/*
* Both here and in the below "for" loop, to allow for repromotion
* after MADV_FREE, conditionally write protect a clean PTE before
* possibly aborting the promotion due to other PTE attributes. Why?
* Suppose that MADV_FREE is applied to a part of a superpage, the
* address range [S, E). pmap_advise() will demote the superpage
* mapping, destroy the 4KB page mapping at the end of [S, E), and
* clear PG_M and PG_A in the PTEs for the rest of [S, E). Later,
* imagine that the memory in [S, E) is recycled, but the last 4KB
* page in [S, E) is not the last to be rewritten, or simply accessed.
* In other words, there is still a 4KB page in [S, E), call it P,
* that is writeable but PG_M and PG_A are clear in P's PTE. Unless
* we write protect P before aborting the promotion, if and when P is
* finally rewritten, there won't be a page fault to trigger
* repromotion.
*/
setpde:
if ((newpde & (PG_M | PG_RW)) == PG_RW) {
/*
Expand All @@ -6794,16 +6811,22 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte,
goto setpde;
newpde &= ~PG_RW;
}
if ((newpde & PG_A) == 0) {
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
return;
}

/*
* Examine each of the other PTEs in the specified PTP. Abort if this
* PTE maps an unexpected 4KB physical page or does not have identical
* characteristics to the first PTE.
*/
pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE;
pa = (newpde & (PG_PS_FRAME | PG_V)) + NBPDR - PAGE_SIZE;
for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) {
oldpte = *pte;
if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
if ((oldpte & (PG_FRAME | PG_V)) != pa) {
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
Expand Down
50 changes: 44 additions & 6 deletions sys/arm64/arm64/pmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -3955,17 +3955,38 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, vm_page_t mpte,
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PMAP_ASSERT_STAGE1(pmap);

/*
* Examine the first L3E in the specified PTP. Abort if this L3E is
* ineligible for promotion, invalid, or does not map the first 4KB
* physical page within a 2MB page.
*/
firstl3 = (pt_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK);
newl2 = pmap_load(firstl3);

if (((newl2 & (~ATTR_MASK | ATTR_AF)) & L2_OFFSET) != ATTR_AF ||
(newl2 & ATTR_SW_NO_PROMOTE) != 0) {
if ((newl2 & ATTR_SW_NO_PROMOTE) != 0)
return;
if ((newl2 & ((~ATTR_MASK & L2_OFFSET) | ATTR_DESCR_MASK)) != L3_PAGE) {
atomic_add_long(&pmap_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
return;
}

/*
* Both here and in the below "for" loop, to allow for repromotion
* after MADV_FREE, conditionally write protect a clean L3E before
* possibly aborting the promotion due to other L3E attributes. Why?
* Suppose that MADV_FREE is applied to a part of a superpage, the
* address range [S, E). pmap_advise() will demote the superpage
* mapping, destroy the 4KB page mapping at the end of [S, E), and
* set AP_RO and clear AF in the L3Es for the rest of [S, E). Later,
* imagine that the memory in [S, E) is recycled, but the last 4KB
* page in [S, E) is not the last to be rewritten, or simply accessed.
* In other words, there is still a 4KB page in [S, E), call it P,
* that is writeable but AP_RO is set and AF is clear in P's L3E.
* Unless we write protect P before aborting the promotion, if and
* when P is finally rewritten, there won't be a page fault to trigger
* repromotion.
*/
setl2:
if ((newl2 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
(ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM)) {
Expand All @@ -3977,10 +3998,27 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, vm_page_t mpte,
goto setl2;
newl2 &= ~ATTR_SW_DBM;
}
if ((newl2 & ATTR_AF) == 0) {
atomic_add_long(&pmap_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
return;
}

pa = newl2 + L2_SIZE - PAGE_SIZE;
/*
* Examine each of the other L3Es in the specified PTP. Abort if this
* L3E maps an unexpected 4KB physical page or does not have identical
* characteristics to the first L3E.
*/
pa = (newl2 & (~ATTR_MASK | ATTR_DESCR_MASK)) + L2_SIZE - PAGE_SIZE;
for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) {
oldl3 = pmap_load(l3);
if ((oldl3 & (~ATTR_MASK | ATTR_DESCR_MASK)) != pa) {
atomic_add_long(&pmap_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
return;
}
setl3:
if ((oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
(ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM)) {
Expand All @@ -3994,7 +4032,7 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, vm_page_t mpte,
goto setl3;
oldl3 &= ~ATTR_SW_DBM;
}
if (oldl3 != pa) {
if ((oldl3 & ATTR_MASK) != (newl2 & ATTR_MASK)) {
atomic_add_long(&pmap_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
Expand Down Expand Up @@ -4033,7 +4071,7 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, vm_page_t mpte,

atomic_add_long(&pmap_l2_promotions, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va,
pmap);
pmap);
}
#endif /* VM_NRESERVLEVEL > 0 */

Expand Down

0 comments on commit f0878da

Please sign in to comment.