DATAS for small HCs (dotnet#100390)

I adjusted the formula for determining a new HC and change how we calculate the gen0 budget based on gen2 size. changes included - + currently we have a very simplistic formula for actually adapting to the size and this basically just makes all the asp.net benchmarks with low surv rate adjust to the min 2.5 mb gen0 budget, while those run ok with such a small budget on a 28 core machine, it doesn't work if we limit the heap count to a small number, eg, 4. what happens is the % time in GC is very high, some benchmarks run with 20% to 40% time in GC. this is obviously not desirable. I reworked this to make it actually adapting to the size. and we'll take the min of this and what we calculated without DATAS. + the formula I had previously did not handle small HCs well so I also adjust that. + got rid of the adjusting to cache size in gc1 for DATAS, this just makes things unpredictable especially for small workloads.
mikem8361 · Apr 3, 2024 · 89bd910 · 89bd910
1 parent 7c030e8
commit 89bd910
Show file tree

Hide file tree

Showing 3 changed files with 151 additions and 60 deletions.
diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp
@@ -22526,6 +22526,15 @@ void gc_heap::gc1()
             {
                 limit = total_generation_count-1;
             }
+
+            size_t total_max_gen_size = 0;
+            for (int i = 0; i < gc_heap::n_heaps; i++)
+            {
+                gc_heap* hp = gc_heap::g_heaps[i];
+                dynamic_data* dd = hp->dynamic_data_of (max_generation);
+                total_max_gen_size += dd_current_size (dd) + dd_desired_allocation (dd);
+            }
+
             for (int gen = 0; gen <= limit; gen++)
             {
                 size_t total_desired = 0;
@@ -22554,20 +22563,35 @@ void gc_heap::gc1()
                     total_already_consumed = temp_total_already_consumed;
                 }
 
-                size_t desired_per_heap = Align (total_desired/gc_heap::n_heaps,
-                                                    get_alignment_constant (gen <= max_generation));
+                size_t desired_per_heap = Align (total_desired/gc_heap::n_heaps, get_alignment_constant (gen <= max_generation));
 
                 size_t already_consumed_per_heap = total_already_consumed / gc_heap::n_heaps;
 
                 if (gen == 0)
                 {
-#if 1 //subsumed by the linear allocation model
+#ifdef DYNAMIC_HEAP_COUNT
+                    if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
+                    {
+                        size_t new_allocation_datas = dynamic_heap_count_data.compute_gen0_new_allocation (total_max_gen_size);
+                        new_allocation_datas = Align (new_allocation_datas, get_alignment_constant (gen <= max_generation));
+                        dprintf (6666, ("gen0 new_alloc %Id (%.3fmb), from datas: %Id (%.3fmb)",
+                            desired_per_heap, ((double)desired_per_heap / 1000.0 / 1000.0),
+                            new_allocation_datas, ((double)new_allocation_datas / 1000.0 / 1000.0)));
+                        desired_per_heap = min (desired_per_heap, new_allocation_datas);
+                    }
+#endif //DYNAMIC_HEAP_COUNT
+
                     // to avoid spikes in mem usage due to short terms fluctuations in survivorship,
                     // apply some smoothing.
+                    size_t desired_per_heap_before_smoothing = desired_per_heap;
                     desired_per_heap = exponential_smoothing (gen, dd_collection_count (dynamic_data_of(gen)), desired_per_heap);
-#endif //0
+                    size_t desired_per_heap_after_smoothing = desired_per_heap;
 
-                    if (!heap_hard_limit)
+                    if (!heap_hard_limit
+#ifdef DYNAMIC_HEAP_COUNT
+                        && (dynamic_adaptation_mode != dynamic_adaptation_to_application_sizes)
+#endif //DYNAMIC_HEAP_COUNT
+                        )
                     {
                         // if desired_per_heap is close to min_gc_size, trim it
                         // down to min_gc_size to stay in the cache
@@ -22584,7 +22608,10 @@ void gc_heap::gc1()
                     }
 #ifdef HOST_64BIT
                     desired_per_heap = joined_youngest_desired (desired_per_heap);
-                    dprintf (2, ("final gen0 new_alloc: %zd", desired_per_heap));
+
+                    dprintf (6666, ("final gen0 new_alloc: total desired: %Id (%.3fmb/heap), before smooth %zd -> after smooth %zd -> after joined %zd",
+                        total_desired, ((double)(total_desired / n_heaps)/ 1000.0 / 1000.0),
+                        desired_per_heap_before_smoothing, desired_per_heap_after_smoothing, desired_per_heap));
 #endif // HOST_64BIT
                     gc_data_global.final_youngest_desired = desired_per_heap;
                 }
@@ -25347,9 +25374,10 @@ int gc_heap::calculate_new_heap_count ()
     // on the way up, we essentially multiply the heap count by 1.5, so we go 1, 2, 3, 5, 8 ...
     // we don't go all the way to the number of CPUs, but stay 1 or 2 short
     int step_up = (n_heaps + 1) / 2;
-    int extra_heaps = 1 + (n_max_heaps >= 32);
+    int extra_heaps = (n_max_heaps >= 16) + (n_max_heaps >= 64);
     int actual_n_max_heaps = n_max_heaps - extra_heaps;
-    int max_growth = max ((n_max_heaps / 4), 2);
+    int max_growth = max ((n_max_heaps / 4), (1 + (actual_n_max_heaps > 3)));
+
     step_up = min (step_up, (actual_n_max_heaps - n_heaps));
 
     // on the way down, we essentially divide the heap count by 1.5
@@ -25392,13 +25420,15 @@ int gc_heap::calculate_new_heap_count ()
     // target_tcp should be configurable.
     float target_tcp = 5.0;
     float target_gen2_tcp = 10.0;
-    float log_base = (float)1.1;
+    float log_base = (float)1.11;
 
     dynamic_heap_count_data.add_to_recorded_tcp (median_throughput_cost_percent);
 
     // This is the average of whatever is in the recorded tcp buffer.
     float avg_recorded_tcp = 0.0;
 
+    size_t num_gcs_since_last_change = current_gc_index - dynamic_heap_count_data.last_changed_gc_index;
+
     if (process_eph_samples_p)
     {
         dynamic_heap_count_data.last_processed_stcp = smoothed_median_throughput_cost_percent;
@@ -25407,22 +25437,21 @@ int gc_heap::calculate_new_heap_count ()
         {
             // If median is high but stcp is lower than target, and if this situation continues, stcp will quickly be above target anyway; otherwise
             // we treat it as an outlier.
-            if (smoothed_median_throughput_cost_percent > target_tcp)
+            if (smoothed_median_throughput_cost_percent >= (target_tcp + 1.0))
             {
-                float step_up_percent = log_with_base ((smoothed_median_throughput_cost_percent - target_tcp + log_base), log_base);
-                float step_up_float = (float)(step_up_percent / 100.0 * actual_n_max_heaps);
+                float step_up_float = (float)(1 + actual_n_max_heaps * log_with_base ((smoothed_median_throughput_cost_percent - target_tcp), log_base) / 100.0);
                 int step_up_int = (int)step_up_float;
 
                 dprintf (6666, ("[CHP0] inc %d(%.3f), last inc %d, %Id GCs elapsed, last stcp %.3f",
                     step_up_int, step_up_float, (int)dynamic_heap_count_data.last_changed_count,
-                    (current_gc_index - dynamic_heap_count_data.last_changed_gc_index), dynamic_heap_count_data.last_changed_stcp));
+                    num_gcs_since_last_change, dynamic_heap_count_data.last_changed_stcp));
 
                 // Don't adjust if we just adjusted last time we checked, unless we are in an extreme situation.
                 if ((smoothed_median_throughput_cost_percent < 20.0f) &&
                     (avg_throughput_cost_percent < 20.0f) &&
-                    ((current_gc_index - dynamic_heap_count_data.last_changed_gc_index) < (2 * dynamic_heap_count_data_t::sample_size)))
+                    (num_gcs_since_last_change < (2 * dynamic_heap_count_data_t::sample_size)))
                 {
-                    dprintf (6666, ("[CHP0] we just adjusted %Id GCs ago, skipping", (current_gc_index - dynamic_heap_count_data.last_changed_gc_index)));
+                    dprintf (6666, ("[CHP0] we just adjusted %Id GCs ago, skipping", num_gcs_since_last_change));
                 }
                 else
                 {
@@ -25435,9 +25464,9 @@ int gc_heap::calculate_new_heap_count ()
                         }
 
                         if (((int)dynamic_heap_count_data.last_changed_count > 0) && (dynamic_heap_count_data.last_changed_gc_index > 0.0) &&
-                            ((current_gc_index - dynamic_heap_count_data.last_changed_gc_index) <= (3 * dynamic_heap_count_data_t::sample_size)))
+                            (num_gcs_since_last_change <= (3 * dynamic_heap_count_data_t::sample_size)))
                         {
-                            dprintf (6666, ("[CHP0-0] just grew %d GCs ago, no change", (current_gc_index - dynamic_heap_count_data.last_changed_gc_index)));
+                            dprintf (6666, ("[CHP0-0] just grew %d GCs ago, no change", num_gcs_since_last_change));
                             step_up_int = 0;
                         }
                         else
@@ -25487,9 +25516,18 @@ int gc_heap::calculate_new_heap_count ()
                         {
                             if (((int)dynamic_heap_count_data.last_changed_count > 0) && (dynamic_heap_count_data.last_changed_gc_index > 0.0))
                             {
-                                (dynamic_heap_count_data.inc_failure_count)++;
-                                dprintf (6666, ("[CHP0-4] just grew %d GCs ago, grow more aggressively from %d -> %d more heaps",
-                                    (current_gc_index - dynamic_heap_count_data.last_changed_gc_index), step_up_int, (step_up_int * (dynamic_heap_count_data.inc_failure_count + 1))));
+                                if (num_gcs_since_last_change > (16 * dynamic_heap_count_data_t::sample_size))
+                                {
+                                    dynamic_heap_count_data.inc_failure_count = 0;
+                                    dprintf (6666, ("[CHP0-4] grew %d GCs ago, too far in the past, set aggressive factor to 0, grow from %d -> %d more heaps",
+                                        num_gcs_since_last_change, dynamic_heap_count_data.inc_failure_count, step_up_int, (step_up_int * (dynamic_heap_count_data.inc_failure_count + 1))));
+                                }
+                                else
+                                {
+                                    (dynamic_heap_count_data.inc_failure_count)++;
+                                    dprintf (6666, ("[CHP0-4] grew %d GCs ago, aggressive factor is %d, grow more aggressively from %d -> %d more heaps",
+                                        num_gcs_since_last_change, dynamic_heap_count_data.inc_failure_count, step_up_int, (step_up_int * (dynamic_heap_count_data.inc_failure_count + 1))));
+                                }
                                 step_up_int *= dynamic_heap_count_data.inc_failure_count + 1;
                             }
                         }
@@ -25514,9 +25552,9 @@ int gc_heap::calculate_new_heap_count ()
                         dynamic_heap_count_data.last_changed_stcp = smoothed_median_throughput_cost_percent;
                     }
 
-                    dprintf (6666, ("[CHP0] tcp %.3f, stcp %.3f -> (%d * %.3f%% = %.3f) -> %d + %d = %d -> %d",
+                    dprintf (6666, ("[CHP0] tcp %.3f, stcp %.3f -> (%d -> %.3f) -> %d + %d = %d -> %d",
                         median_throughput_cost_percent, smoothed_median_throughput_cost_percent,
-                        actual_n_max_heaps, step_up_percent, step_up_float, step_up_int, n_heaps, (n_heaps + step_up_int), new_n_heaps));
+                        actual_n_max_heaps, step_up_float, step_up_int, n_heaps, (n_heaps + step_up_int), new_n_heaps));
                 }
             }
         }
@@ -25533,7 +25571,7 @@ int gc_heap::calculate_new_heap_count ()
             }
             dprintf (6666, ("[CHP1] last time adjusted %s by %d at GC#%Id (%Id GCs since), stcp was %.3f, now stcp is %.3f",
                 ((dynamic_heap_count_data.last_changed_count > 0.0) ? "up" : "down"), (int)dynamic_heap_count_data.last_changed_count,
-                dynamic_heap_count_data.last_changed_gc_index, (current_gc_index - dynamic_heap_count_data.last_changed_gc_index),
+                dynamic_heap_count_data.last_changed_gc_index, num_gcs_since_last_change,
                 dynamic_heap_count_data.last_changed_stcp, smoothed_median_throughput_cost_percent));
 
             float below_target_diff = target_tcp - median_throughput_cost_percent;
@@ -25546,10 +25584,16 @@ int gc_heap::calculate_new_heap_count ()
             if (dynamic_heap_count_data.below_target_accumulation >= dynamic_heap_count_data.below_target_threshold)
             {
                 int below_target_tcp_count = dynamic_heap_count_data.rearrange_recorded_tcp ();
-                float below_target_tcp_slope = slope (dynamic_heap_count_data.recorded_tcp, below_target_tcp_count, &avg_recorded_tcp);
+                float below_target_tcp_slope = slope (dynamic_heap_count_data.recorded_tcp_rearranged, below_target_tcp_count, &avg_recorded_tcp);
                 float diff_pct = (target_tcp - smoothed_median_throughput_cost_percent) / target_tcp;
                 int step_down_int = (int)(diff_pct / 2.0 * n_heaps);
-                dprintf (6666, ("[CHP1] observed %d tcp's <= or ~ target, avg %.3f, slope %.3f, stcp %.3f below target, shrink by %.3f * %d = %d heaps",
+                if ((step_down_int == 0) && dynamic_heap_count_data.is_tcp_far_below (diff_pct))
+                {
+                    dprintf (6666, ("[CHP1] we are far below target, reduce by 1 heap"));
+                    step_down_int = 1;
+                }
+
+                dprintf (6666, ("[CHP1] observed %d tcp's <= or ~ target, avg %.3f, slope %.3f, stcp %.3f%% below target, shrink by %.3f%% * %d = %d heaps",
                     below_target_tcp_count, avg_recorded_tcp, below_target_tcp_slope, (diff_pct * 100.0), (diff_pct * 50.0), n_heaps, step_down_int));
 
                 bool shrink_p = false;
@@ -25629,11 +25673,22 @@ int gc_heap::calculate_new_heap_count ()
 
                 if (shrink_p && step_down_int && (new_n_heaps > step_down_int))
                 {
-                    // TODO - if we see that it wants to shrink by 1 heap too many times, we do want to shrink.
                     if (step_down_int == 1)
                     {
-                        step_down_int = 0;
-                        dprintf (6666, ("[CHP1-3] don't shrink if it's just one heap. not worth it"));
+                        if (dynamic_heap_count_data.should_dec_by_one())
+                        {
+                            dprintf (6666, ("[CHP1-3] shrink by one heap"));
+                        }
+                        else
+                        {
+                            step_down_int = 0;
+                            dprintf (6666, ("[CHP1-3] don't shrink just yet if it's just one heap"));
+                        }
+                    }
+                    else
+                    {
+                        dynamic_heap_count_data.reset_dec_by_one();
+                        dprintf (6666, ("[CHP1-3] shrink by %d heap(s), reset dec by one", step_down_int));
                     }
 
                     new_n_heaps -= step_down_int;
@@ -26265,7 +26320,7 @@ bool gc_heap::change_heap_count (int new_n_heaps)
                 assert (gen_size >= dd_fragmentation (dd));
                 dd_current_size (dd) = gen_size - dd_fragmentation (dd);
 
-                dprintf (3, ("h%d g%d: budget: %zd, left in budget: %zd, %zd generation_size: %zd fragmentation: %zd current_size: %zd",
+                dprintf (3, ("h%d g%d: budget: %zd, left in budget: %zd, generation_size: %zd fragmentation: %zd current_size: %zd",
                     i,
                     gen_idx,
                     desired_alloc_per_heap[gen_idx],
@@ -43608,35 +43663,6 @@ size_t gc_heap::desired_new_allocation (dynamic_data* dd,
                     new_allocation = min (new_allocation,
                                           max (min_gc_size, (max_size/3)));
                 }
-
-#ifdef DYNAMIC_HEAP_COUNT
-                if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
-                {
-                    // if this is set, limit gen 0 size to a small multiple of the older generations
-                    float f_older_gen = ((10.0f / conserve_mem_setting) - 1) * 0.5f;
-
-                    // compute the total size of the older generations
-                    size_t older_size = 0;
-                    for (int gen_index_older = 1; gen_index_older < total_generation_count; gen_index_older++)
-                    {
-                        dynamic_data* dd_older = dynamic_data_of (gen_index_older);
-                        older_size += dd_current_size (dd_older);
-                    }
-                    // derive a new allocation size from it
-                    size_t new_allocation_from_older = (size_t)(older_size*f_older_gen);
-
-                    // limit the new allocation to this value
-                    new_allocation = min (new_allocation, new_allocation_from_older);
-
-                    // but make sure it doesn't drop below the minimum size
-                    new_allocation = max (new_allocation, min_gc_size);
-
-                    dprintf (2, ("f_older_gen: %d%% older_size: %zd new_allocation: %zd",
-                        (int)(f_older_gen*100),
-                        older_size,
-                        new_allocation));
-                }
-#endif //DYNAMIC_HEAP_COUNT
             }
         }
 
@@ -48782,7 +48808,8 @@ HRESULT GCHeap::Initialize()
             // start with only 1 heap
             gc_heap::smoothed_desired_total[0] /= gc_heap::n_heaps;
             int initial_n_heaps = 1;
-            dprintf (9999, ("gc_heap::n_heaps is %d, initial %d", gc_heap::n_heaps, initial_n_heaps));
+
+            dprintf (6666, ("n_heaps is %d, initial n_heaps is %d, %d cores", gc_heap::n_heaps, initial_n_heaps, g_num_processors));
 
             {
                 if (!gc_heap::prepare_to_change_heap_count (initial_n_heaps))
@@ -48810,6 +48837,12 @@ HRESULT GCHeap::Initialize()
             gc_heap::dynamic_heap_count_data.below_target_threshold = 10.0;
             gc_heap::dynamic_heap_count_data.inc_recheck_threshold = 5;
             gc_heap::dynamic_heap_count_data.dec_failure_recheck_threshold = 5;
+            // This should really be set as part of computing static data and should take conserve_mem_setting into consideration.
+            gc_heap::dynamic_heap_count_data.max_gen0_new_allocation = min (dd_max_size (gc_heap::g_heaps[0]->dynamic_data_of (0)), (64 * 1024 * 1024));
+            gc_heap::dynamic_heap_count_data.min_gen0_new_allocation = dd_min_size (gc_heap::g_heaps[0]->dynamic_data_of (0));
+
+            dprintf (6666, ("datas max gen0 budget %Id, min %Id",
+                gc_heap::dynamic_heap_count_data.max_gen0_new_allocation, gc_heap::dynamic_heap_count_data.min_gen0_new_allocation));
         }
 #endif //DYNAMIC_HEAP_COUNT
         GCScan::GcRuntimeStructuresValid (TRUE);