Skip to content

Commit

Permalink
[wasm] Optimize bundled_resources key creation, hashing, and comparis…
Browse files Browse the repository at this point in the history
…on (dotnet#101460)

Optimize bundled_resources key creation and hashing
Migrate bundled_resources to simdhash
  • Loading branch information
kg committed May 13, 2024
1 parent a37af85 commit 0432b30
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 41 deletions.
1 change: 1 addition & 0 deletions src/mono/mono/eglib/eglib-remap.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@
#define g_log_set_fatal_mask monoeg_g_log_set_fatal_mask
#define g_logv monoeg_g_logv
#define g_memdup monoeg_g_memdup
#define g_memrchr monoeg_g_memrchr
#define g_mem_set_vtable monoeg_g_mem_set_vtable
#define g_mem_get_vtable monoeg_g_mem_get_vtable
#define g_mkdtemp monoeg_g_mkdtemp
Expand Down
1 change: 1 addition & 0 deletions src/mono/mono/eglib/glib.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,7 @@ gchar *g_strchug (gchar *str);
gchar *g_strchomp (gchar *str);
gchar *g_strnfill (gsize length, gchar fill_char);
gsize g_strnlen (const char*, gsize);
const gchar *g_memrchr (const char *s, char c, size_t n);

void g_strdelimit (char *string, char delimiter, char new_delimiter);

Expand Down
9 changes: 9 additions & 0 deletions src/mono/mono/eglib/gstr.c
Original file line number Diff line number Diff line change
Expand Up @@ -783,3 +783,12 @@ g_strnlen (const char* s, gsize n)
for (i = 0; i < n && s [i]; ++i) ;
return i;
}

const gchar *
g_memrchr (const char *s, char c, size_t n)
{
while (n--)
if (s[n] == c)
return (void *)(s + n);
return NULL;
}
1 change: 1 addition & 0 deletions src/mono/mono/metadata/bundled-resources-internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ typedef enum {

typedef void (*free_bundled_resource_func)(void *, void*);

// WARNING: The layout of these structs cannot change because EmitBundleBase.cs depends on it!
typedef struct _MonoBundledResource {
MonoBundledResourceType type;
const char *id;
Expand Down
109 changes: 68 additions & 41 deletions src/mono/mono/metadata/bundled-resources.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@
#include <mono/metadata/appdomain.h>
#include <mono/metadata/bundled-resources-internals.h>
#include <mono/metadata/webcil-loader.h>
#include "../native/containers/dn-simdhash-specializations.h"
#include "../native/containers/dn-simdhash-utils.h"

static GHashTable *bundled_resources = NULL;
static dn_simdhash_ght_t *bundled_resources = NULL;
static dn_simdhash_ptr_ptr_t *bundled_resource_key_lookup_table = NULL;
static bool bundled_resources_contains_assemblies = false;
static bool bundled_resources_contains_satellite_assemblies = false;

Expand All @@ -31,8 +34,10 @@ mono_bundled_resources_free (void)
{
g_assert (mono_runtime_is_shutting_down ());

g_hash_table_destroy (bundled_resources);
dn_simdhash_free (bundled_resources);
dn_simdhash_free (bundled_resource_key_lookup_table);
bundled_resources = NULL;
bundled_resource_key_lookup_table = NULL;

bundled_resources_contains_assemblies = false;
bundled_resources_contains_satellite_assemblies = false;
Expand All @@ -50,6 +55,12 @@ bundled_resources_value_destroy_func (void *resource)
MonoBundledResource *value = (MonoBundledResource *)resource;
if (value->free_func)
value->free_func (resource, value->free_data);

char *key;
if (dn_simdhash_ptr_ptr_try_get_value (bundled_resource_key_lookup_table, (void *)value->id, (void **)&key)) {
dn_simdhash_ptr_ptr_try_remove (bundled_resource_key_lookup_table, (void *)value->id);
g_free (key);
}
}

static bool
Expand All @@ -62,48 +73,51 @@ bundled_resources_is_known_assembly_extension (const char *ext)
#endif
}

static gboolean
bundled_resources_resource_id_equal (const char *id_one, const char *id_two)
// If a bundled resource has a known assembly extension, we strip the extension from its name
// This ensures that lookups for foo.dll will work even if the assembly is in a webcil container
static char *
key_from_id (const char *id, char *buffer, guint buffer_len)
{
const char *extension_one = strrchr (id_one, '.');
const char *extension_two = strrchr (id_two, '.');
if (extension_one && extension_two && bundled_resources_is_known_assembly_extension (extension_one) && bundled_resources_is_known_assembly_extension (extension_two)) {
size_t len_one = extension_one - id_one;
size_t len_two = extension_two - id_two;
return (len_one == len_two) && !strncmp (id_one, id_two, len_one);
size_t id_length = strlen (id),
extension_offset = -1;
const char *extension = g_memrchr (id, '.', id_length);
if (extension)
extension_offset = extension - id;
if (!buffer) {
// Add space for .dll and null terminator
buffer_len = (guint)(id_length + 6);
buffer = g_malloc (buffer_len);
}
buffer[0] = 0;

return !strcmp (id_one, id_two);
if (extension_offset && bundled_resources_is_known_assembly_extension (extension)) {
// Subtract from buffer_len to make sure we have space for .dll
g_strlcpy (buffer, id, MIN(buffer_len - 4, extension_offset + 2));
strcat (buffer, "dll");
} else {
g_strlcpy (buffer, id, MIN(buffer_len, id_length + 1));
}

return buffer;
}

static guint
bundled_resources_resource_id_hash (const char *id)
static gboolean
bundled_resources_resource_id_equal (const char *key_one, const char *key_two)
{
const char *current = id;
const char *extension = NULL;
guint previous_hash = 0;
guint hash = 0;

while (*current) {
hash = (hash << 5) - (hash + *current);
if (*current == '.') {
extension = current;
previous_hash = hash;
}
current++;
}

// alias all extensions to .dll
if (extension && bundled_resources_is_known_assembly_extension (extension)) {
hash = previous_hash;
hash = (hash << 5) - (hash + 'd');
hash = (hash << 5) - (hash + 'l');
hash = (hash << 5) - (hash + 'l');
}
return strcmp (key_one, key_two) == 0;
}

return hash;
static guint32
bundled_resources_resource_id_hash (const char *key)
{
// FIXME: Seed
// FIXME: We should cache the hash code so rehashes are cheaper
return MurmurHash3_32_streaming ((const uint8_t *)key, 0);
}

static MonoBundledResource *
bundled_resources_get (const char *id);

//---------------------------------------------------------------------------------------
//
// mono_bundled_resources_add handles bundling of many types of resources to circumvent
Expand All @@ -130,7 +144,11 @@ mono_bundled_resources_add (MonoBundledResource **resources_to_bundle, uint32_t
g_assert (!domain);

if (!bundled_resources)
bundled_resources = g_hash_table_new_full ((GHashFunc)bundled_resources_resource_id_hash, (GEqualFunc)bundled_resources_resource_id_equal, NULL, bundled_resources_value_destroy_func);
// FIXME: Choose a good initial capacity to avoid rehashes during startup. I picked one at random
bundled_resources = dn_simdhash_ght_new_full ((GHashFunc)bundled_resources_resource_id_hash, (GEqualFunc)bundled_resources_resource_id_equal, NULL, bundled_resources_value_destroy_func, 2048, NULL);

if (!bundled_resource_key_lookup_table)
bundled_resource_key_lookup_table = dn_simdhash_ptr_ptr_new (2048, NULL);

bool assemblyAdded = false;
bool satelliteAssemblyAdded = false;
Expand All @@ -143,7 +161,13 @@ mono_bundled_resources_add (MonoBundledResource **resources_to_bundle, uint32_t
if (resource_to_bundle->type == MONO_BUNDLED_SATELLITE_ASSEMBLY)
satelliteAssemblyAdded = true;

g_hash_table_insert (bundled_resources, (gpointer) resource_to_bundle->id, resource_to_bundle);
// Generate the hash key for the id (strip certain extensions) and store it
// so that we can free it later when freeing the bundled data
char *key = key_from_id (resource_to_bundle->id, NULL, 0);
dn_simdhash_ptr_ptr_try_add (bundled_resource_key_lookup_table, (void *)resource_to_bundle->id, key);

g_assert (dn_simdhash_ght_try_add (bundled_resources, (gpointer) key, resource_to_bundle));
// g_assert (bundled_resources_get (resource_to_bundle->id) == resource_to_bundle);
}

if (assemblyAdded)
Expand Down Expand Up @@ -172,7 +196,12 @@ bundled_resources_get (const char *id)
if (!bundled_resources)
return NULL;

return g_hash_table_lookup (bundled_resources, id);
char key_buffer[1024];
key_from_id(id, key_buffer, sizeof(key_buffer));

MonoBundledResource *result = NULL;
dn_simdhash_ght_try_get_value (bundled_resources, key_buffer, (void **)&result);
return result;
}

//---------------------------------------------------------------------------------------
Expand Down Expand Up @@ -364,9 +393,7 @@ bool
mono_bundled_resources_get_data_resource_values (const char *id, const uint8_t **data_out, uint32_t *size_out)
{
MonoBundledDataResource *bundled_data_resource = bundled_resources_get_data_resource (id);
if (!bundled_data_resource ||
!bundled_data_resource->data.data ||
bundled_data_resource->data.size == 0)
if (!bundled_data_resource || !bundled_data_resource->data.data)
return false;

if (data_out)
Expand Down

0 comments on commit 0432b30

Please sign in to comment.