From 36c96a9653156044ab450cc43d032b30757803b6 Mon Sep 17 00:00:00 2001
From: Martin Habovstiak <martin.habovstiak@gmail.com>
Date: Wed, 5 Jan 2022 18:34:08 +0100
Subject: [PATCH 1/4] Clean up once code

This was originally intended to just change `swap` to `compare_exchange`
however other changes were needed too:

* Load with `Release` ordering seemed wrong because if it loaded `DONE`
  before while loop it wouldn't synchronize with the storing thread.
* There's no point in repeatedly checking if the value is `NONE` so this
  check was moved about while loop
* Due to a mistake I wanted to inspect using `miri` but couldn't because
  of FFI call. Separating the once implementation from consumer allowed
  writing a test in pure Rust and inspect via `miri`.
* The initializing code now skips loading the value again because it
  already knows it's initialized.
* Orderings are now explained in the comments
---
 secp256k1-sys/src/lib.rs | 114 +++++++++++++++++++++++++++++----------
 1 file changed, 85 insertions(+), 29 deletions(-)
diff --git a/secp256k1-sys/src/lib.rs b/secp256k1-sys/src/lib.rs
index 4f0f0d5de..4281b59ed 100644
--- a/secp256k1-sys/src/lib.rs
+++ b/secp256k1-sys/src/lib.rs
@@ -663,7 +663,6 @@ impl<T> CPtr for [T] {
 #[cfg(fuzzing)]
 mod fuzz_dummy {
     use super::*;
-    use core::sync::atomic::{AtomicUsize, Ordering};
 
     #[cfg(rust_secp_no_symbol_renaming)] compile_error!("We do not support fuzzing with rust_secp_no_symbol_renaming");
 
@@ -673,6 +672,81 @@ mod fuzz_dummy {
         fn rustsecp256k1_v0_4_1_context_preallocated_clone(cx: *const Context, prealloc: *mut c_void) -> *mut Context;
     }
 
+    mod once {
+        use core::sync::atomic::{AtomicUsize, Ordering};
+
+        const NONE: usize = 0;
+        const WORKING: usize = 1;
+        const DONE: usize = 2;
+
+        pub(crate) struct Once(AtomicUsize);
+
+        impl Once {
+            pub(crate) const INIT: Once = Once(AtomicUsize::new(NONE));
+
+            pub(crate) fn run(&self, f: impl FnOnce()) {
+                // Acquire ordering because if it's DONE the following reads must go after this load.
+                let mut have_ctx = self.0.load(Ordering::Acquire);
+                if have_ctx == NONE {
+                    // Ordering: on success we're only signalling to other thread that work is in progress
+                    // without transferring other data, so it should be Relaxed, on failure the value may be DONE,
+                    // so we want to Acquire to safely proceed in that case.
+                    // However compare_exchange doesn't allow failure ordering to be stronger than success
+                    // so both are Acquire.
+                    match self.0.compare_exchange(NONE, WORKING, Ordering::Acquire, Ordering::Acquire) {
+                        Ok(_) => {
+                            f();
+                            // We wrote data in memory that other threads may read so we need Release
+                            self.0.store(DONE, Ordering::Release);
+                            return;
+                        },
+                        Err(value) => have_ctx = value,
+                    }
+                }
+                while have_ctx != DONE {
+                    // Another thread is building, just busy-loop until they're done.
+                    assert_eq!(have_ctx, WORKING);
+                    // This thread will read whatever the other thread wrote so this needs to be Acquire.
+                    have_ctx = self.0.load(Ordering::Acquire);
+                    #[cfg(feature = "std")]
+                    ::std::thread::yield_now();
+                }
+            }
+        }
+
+        #[cfg(all(test, feature = "std"))]
+        mod tests {
+            use super::Once;
+
+            #[test]
+            fn test_once() {
+                use std::cell::UnsafeCell;
+                static ONCE: Once = Once::INIT;
+                struct PretendSync(UnsafeCell<u32>);
+
+                static VALUE: PretendSync = PretendSync(UnsafeCell::new(42));
+                unsafe impl Sync for PretendSync {}
+
+                let threads = (0..5).map(|_| ::std::thread::spawn(|| {
+                    ONCE.run(|| unsafe {
+                        assert_eq!(*VALUE.0.get(), 42);
+                        *VALUE.0.get() = 47;
+                    });
+                    unsafe {
+                        assert_eq!(*VALUE.0.get(), 47);
+                    }
+                }))
+                .collect::<Vec<_>>();
+                for thread in threads {
+                    thread.join().unwrap();
+                }
+                unsafe {
+                    assert_eq!(*VALUE.0.get(), 47);
+                }
+            }
+        }
+    }
+
     #[cfg(feature = "lowmemory")]
     const CTX_SIZE: usize = 1024 * 65;
     #[cfg(not(feature = "lowmemory"))]
@@ -683,10 +757,7 @@ mod fuzz_dummy {
         CTX_SIZE
     }
 
-    static HAVE_PREALLOCATED_CONTEXT: AtomicUsize = AtomicUsize::new(0);
-    const HAVE_CONTEXT_NONE: usize = 0;
-    const HAVE_CONTEXT_WORKING: usize = 1;
-    const HAVE_CONTEXT_DONE: usize = 2;
+    static HAVE_PREALLOCATED_CONTEXT: once::Once = once::Once::INIT;
     static mut PREALLOCATED_CONTEXT: [u8; CTX_SIZE] = [0; CTX_SIZE];
     pub unsafe fn secp256k1_context_preallocated_create(prealloc: *mut c_void, flags: c_uint) -> *mut Context {
         // While applications should generally avoid creating too many contexts, sometimes fuzzers
@@ -694,30 +765,15 @@ mod fuzz_dummy {
         // avoid being overly slow here. We do so by having a static context and copying it into
         // new buffers instead of recalculating it. Because we shouldn't rely on std, we use a
         // simple hand-written OnceFlag built out of an atomic to gate the global static.
-        let mut have_ctx = HAVE_PREALLOCATED_CONTEXT.load(Ordering::Relaxed);
-        while have_ctx != HAVE_CONTEXT_DONE {
-            if have_ctx == HAVE_CONTEXT_NONE {
-                have_ctx = HAVE_PREALLOCATED_CONTEXT.swap(HAVE_CONTEXT_WORKING, Ordering::AcqRel);
-                if have_ctx == HAVE_CONTEXT_NONE {
-                    assert!(rustsecp256k1_v0_4_1_context_preallocated_size(SECP256K1_START_SIGN | SECP256K1_START_VERIFY) + std::mem::size_of::<c_uint>() <= CTX_SIZE);
-                    assert_eq!(rustsecp256k1_v0_4_1_context_preallocated_create(
-                            PREALLOCATED_CONTEXT[..].as_ptr() as *mut c_void,
-                            SECP256K1_START_SIGN | SECP256K1_START_VERIFY),
-                        PREALLOCATED_CONTEXT[..].as_ptr() as *mut Context);
-                    assert_eq!(HAVE_PREALLOCATED_CONTEXT.swap(HAVE_CONTEXT_DONE, Ordering::AcqRel),
-                        HAVE_CONTEXT_WORKING);
-                } else if have_ctx == HAVE_CONTEXT_DONE {
-                    // Another thread finished while we were swapping.
-                    HAVE_PREALLOCATED_CONTEXT.store(HAVE_CONTEXT_DONE, Ordering::Release);
-                }
-            } else {
-                // Another thread is building, just busy-loop until they're done.
-                assert_eq!(have_ctx, HAVE_CONTEXT_WORKING);
-                have_ctx = HAVE_PREALLOCATED_CONTEXT.load(Ordering::Acquire);
-                #[cfg(feature = "std")]
-                std::thread::yield_now();
-            }
-        }
+
+        HAVE_PREALLOCATED_CONTEXT.run(|| {
+            assert!(rustsecp256k1_v0_4_1_context_preallocated_size(SECP256K1_START_SIGN | SECP256K1_START_VERIFY) + std::mem::size_of::<c_uint>() <= CTX_SIZE);
+            assert_eq!(rustsecp256k1_v0_4_1_context_preallocated_create(
+                    PREALLOCATED_CONTEXT[..].as_ptr() as *mut c_void,
+                    SECP256K1_START_SIGN | SECP256K1_START_VERIFY),
+                PREALLOCATED_CONTEXT[..].as_ptr() as *mut Context);
+        });
+
         ptr::copy_nonoverlapping(PREALLOCATED_CONTEXT[..].as_ptr(), prealloc as *mut u8, CTX_SIZE);
         let ptr = (prealloc as *mut u8).add(CTX_SIZE).sub(std::mem::size_of::<c_uint>());
         (ptr as *mut c_uint).write(flags);

From 5cbf5ee03986e939a8e49fe6f71542881c68613a Mon Sep 17 00:00:00 2001
From: Martin Habovstiak <martin.habovstiak@gmail.com>
Date: Wed, 5 Jan 2022 21:50:29 +0100
Subject: [PATCH 2/4] Optimize loading of preallocated context

This caches the initialization state of preallocated context in a thread
local variable to avoid atomics in subsequent accesses.
---
 secp256k1-sys/src/lib.rs | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/secp256k1-sys/src/lib.rs b/secp256k1-sys/src/lib.rs
index 4281b59ed..187c34275 100644
--- a/secp256k1-sys/src/lib.rs
+++ b/secp256k1-sys/src/lib.rs
@@ -759,6 +759,10 @@ mod fuzz_dummy {
 
     static HAVE_PREALLOCATED_CONTEXT: once::Once = once::Once::INIT;
     static mut PREALLOCATED_CONTEXT: [u8; CTX_SIZE] = [0; CTX_SIZE];
+    #[cfg(feature = "std")]
+    thread_local! {
+        static CACHE_HAVE_PREALLOCATED_CONTEXT: std::cell::Cell<bool> = std::cell::Cell::new(false);
+    }
     pub unsafe fn secp256k1_context_preallocated_create(prealloc: *mut c_void, flags: c_uint) -> *mut Context {
         // While applications should generally avoid creating too many contexts, sometimes fuzzers
         // perform tasks repeatedly which real applications may only do rarely. Thus, we want to
@@ -766,13 +770,23 @@ mod fuzz_dummy {
         // new buffers instead of recalculating it. Because we shouldn't rely on std, we use a
         // simple hand-written OnceFlag built out of an atomic to gate the global static.
 
-        HAVE_PREALLOCATED_CONTEXT.run(|| {
-            assert!(rustsecp256k1_v0_4_1_context_preallocated_size(SECP256K1_START_SIGN | SECP256K1_START_VERIFY) + std::mem::size_of::<c_uint>() <= CTX_SIZE);
-            assert_eq!(rustsecp256k1_v0_4_1_context_preallocated_create(
-                    PREALLOCATED_CONTEXT[..].as_ptr() as *mut c_void,
-                    SECP256K1_START_SIGN | SECP256K1_START_VERIFY),
-                PREALLOCATED_CONTEXT[..].as_ptr() as *mut Context);
-        });
+        #[cfg(feature = "std")]
+        let should_initialize = !CACHE_HAVE_PREALLOCATED_CONTEXT.with(|value| value.get());
+
+        #[cfg(not(feature = "std"))]
+        let should_initialize = true;
+
+        if should_initialize {
+            HAVE_PREALLOCATED_CONTEXT.run(|| {
+                assert!(rustsecp256k1_v0_4_1_context_preallocated_size(SECP256K1_START_SIGN | SECP256K1_START_VERIFY) + std::mem::size_of::<c_uint>() <= CTX_SIZE);
+                assert_eq!(rustsecp256k1_v0_4_1_context_preallocated_create(
+                        PREALLOCATED_CONTEXT[..].as_ptr() as *mut c_void,
+                        SECP256K1_START_SIGN | SECP256K1_START_VERIFY),
+                    PREALLOCATED_CONTEXT[..].as_ptr() as *mut Context);
+            });
+            #[cfg(feature = "std")]
+            CACHE_HAVE_PREALLOCATED_CONTEXT.with(|value| value.set(true));
+        }
 
         ptr::copy_nonoverlapping(PREALLOCATED_CONTEXT[..].as_ptr(), prealloc as *mut u8, CTX_SIZE);
         let ptr = (prealloc as *mut u8).add(CTX_SIZE).sub(std::mem::size_of::<c_uint>());

From beac3866e967c7e49cc5b5d3fc45b7c2d8d523c1 Mon Sep 17 00:00:00 2001
From: Martin Habovstiak <martin.habovstiak@gmail.com>
Date: Wed, 5 Jan 2022 21:55:05 +0100
Subject: [PATCH 3/4] Fixed no_std fuzzing

Accessing `std::mem` instead of `core::mem` caused fuzzing to break
without `std`.
---
 secp256k1-sys/src/lib.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/secp256k1-sys/src/lib.rs b/secp256k1-sys/src/lib.rs
index 187c34275..16602369e 100644
--- a/secp256k1-sys/src/lib.rs
+++ b/secp256k1-sys/src/lib.rs
@@ -753,7 +753,7 @@ mod fuzz_dummy {
     const CTX_SIZE: usize = 1024 * (1024 + 128);
     // Contexts
     pub unsafe fn secp256k1_context_preallocated_size(flags: c_uint) -> size_t {
-        assert!(rustsecp256k1_v0_4_1_context_preallocated_size(flags) + std::mem::size_of::<c_uint>() <= CTX_SIZE);
+        assert!(rustsecp256k1_v0_4_1_context_preallocated_size(flags) + core::mem::size_of::<c_uint>() <= CTX_SIZE);
         CTX_SIZE
     }
 
@@ -778,7 +778,7 @@ mod fuzz_dummy {
 
         if should_initialize {
             HAVE_PREALLOCATED_CONTEXT.run(|| {
-                assert!(rustsecp256k1_v0_4_1_context_preallocated_size(SECP256K1_START_SIGN | SECP256K1_START_VERIFY) + std::mem::size_of::<c_uint>() <= CTX_SIZE);
+                assert!(rustsecp256k1_v0_4_1_context_preallocated_size(SECP256K1_START_SIGN | SECP256K1_START_VERIFY) + core::mem::size_of::<c_uint>() <= CTX_SIZE);
                 assert_eq!(rustsecp256k1_v0_4_1_context_preallocated_create(
                         PREALLOCATED_CONTEXT[..].as_ptr() as *mut c_void,
                         SECP256K1_START_SIGN | SECP256K1_START_VERIFY),
@@ -789,14 +789,14 @@ mod fuzz_dummy {
         }
 
         ptr::copy_nonoverlapping(PREALLOCATED_CONTEXT[..].as_ptr(), prealloc as *mut u8, CTX_SIZE);
-        let ptr = (prealloc as *mut u8).add(CTX_SIZE).sub(std::mem::size_of::<c_uint>());
+        let ptr = (prealloc as *mut u8).add(CTX_SIZE).sub(core::mem::size_of::<c_uint>());
         (ptr as *mut c_uint).write(flags);
         prealloc as *mut Context
     }
     pub unsafe fn secp256k1_context_preallocated_clone_size(_cx: *const Context) -> size_t { CTX_SIZE }
     pub unsafe fn secp256k1_context_preallocated_clone(cx: *const Context, prealloc: *mut c_void) -> *mut Context {
-        let orig_ptr = (cx as *mut u8).add(CTX_SIZE).sub(std::mem::size_of::<c_uint>());
-        let new_ptr = (prealloc as *mut u8).add(CTX_SIZE).sub(std::mem::size_of::<c_uint>());
+        let orig_ptr = (cx as *mut u8).add(CTX_SIZE).sub(core::mem::size_of::<c_uint>());
+        let new_ptr = (prealloc as *mut u8).add(CTX_SIZE).sub(core::mem::size_of::<c_uint>());
         let flags = (orig_ptr as *mut c_uint).read();
         (new_ptr as *mut c_uint).write(flags);
         rustsecp256k1_v0_4_1_context_preallocated_clone(cx, prealloc)
@@ -815,7 +815,7 @@ mod fuzz_dummy {
         let cx_flags = if cx == secp256k1_context_no_precomp {
             1
         } else {
-            let ptr = (cx as *const u8).add(CTX_SIZE).sub(std::mem::size_of::<c_uint>());
+            let ptr = (cx as *const u8).add(CTX_SIZE).sub(core::mem::size_of::<c_uint>());
             (ptr as *const c_uint).read()
         };
         assert_eq!(cx_flags & 1, 1); // SECP256K1_FLAGS_TYPE_CONTEXT

From 51f398af2c5c155df502743f53f11178fb617200 Mon Sep 17 00:00:00 2001
From: Martin Habovstiak <martin.habovstiak@gmail.com>
Date: Thu, 6 Jan 2022 01:10:10 +0100
Subject: [PATCH 4/4] Revert "Optimize loading of preallocated context"

This reverts commit 5cbf5ee03986e939a8e49fe6f71542881c68613a.
---
 secp256k1-sys/src/lib.rs | 28 +++++++---------------------
 1 file changed, 7 insertions(+), 21 deletions(-)

diff --git a/secp256k1-sys/src/lib.rs b/secp256k1-sys/src/lib.rs
index 16602369e..c62867ac6 100644
--- a/secp256k1-sys/src/lib.rs
+++ b/secp256k1-sys/src/lib.rs
@@ -759,10 +759,6 @@ mod fuzz_dummy {
 
     static HAVE_PREALLOCATED_CONTEXT: once::Once = once::Once::INIT;
     static mut PREALLOCATED_CONTEXT: [u8; CTX_SIZE] = [0; CTX_SIZE];
-    #[cfg(feature = "std")]
-    thread_local! {
-        static CACHE_HAVE_PREALLOCATED_CONTEXT: std::cell::Cell<bool> = std::cell::Cell::new(false);
-    }
     pub unsafe fn secp256k1_context_preallocated_create(prealloc: *mut c_void, flags: c_uint) -> *mut Context {
         // While applications should generally avoid creating too many contexts, sometimes fuzzers
         // perform tasks repeatedly which real applications may only do rarely. Thus, we want to
@@ -770,23 +766,13 @@ mod fuzz_dummy {
         // new buffers instead of recalculating it. Because we shouldn't rely on std, we use a
         // simple hand-written OnceFlag built out of an atomic to gate the global static.
 
-        #[cfg(feature = "std")]
-        let should_initialize = !CACHE_HAVE_PREALLOCATED_CONTEXT.with(|value| value.get());
-
-        #[cfg(not(feature = "std"))]
-        let should_initialize = true;
-
-        if should_initialize {
-            HAVE_PREALLOCATED_CONTEXT.run(|| {
-                assert!(rustsecp256k1_v0_4_1_context_preallocated_size(SECP256K1_START_SIGN | SECP256K1_START_VERIFY) + core::mem::size_of::<c_uint>() <= CTX_SIZE);
-                assert_eq!(rustsecp256k1_v0_4_1_context_preallocated_create(
-                        PREALLOCATED_CONTEXT[..].as_ptr() as *mut c_void,
-                        SECP256K1_START_SIGN | SECP256K1_START_VERIFY),
-                    PREALLOCATED_CONTEXT[..].as_ptr() as *mut Context);
-            });
-            #[cfg(feature = "std")]
-            CACHE_HAVE_PREALLOCATED_CONTEXT.with(|value| value.set(true));
-        }
+        HAVE_PREALLOCATED_CONTEXT.run(|| {
+            assert!(rustsecp256k1_v0_4_1_context_preallocated_size(SECP256K1_START_SIGN | SECP256K1_START_VERIFY) + core::mem::size_of::<c_uint>() <= CTX_SIZE);
+            assert_eq!(rustsecp256k1_v0_4_1_context_preallocated_create(
+                    PREALLOCATED_CONTEXT[..].as_ptr() as *mut c_void,
+                    SECP256K1_START_SIGN | SECP256K1_START_VERIFY),
+                PREALLOCATED_CONTEXT[..].as_ptr() as *mut Context);
+        });
 
         ptr::copy_nonoverlapping(PREALLOCATED_CONTEXT[..].as_ptr(), prealloc as *mut u8, CTX_SIZE);
         let ptr = (prealloc as *mut u8).add(CTX_SIZE).sub(core::mem::size_of::<c_uint>());