Skip to content

Commit

Permalink
Correct EASE R and update full results. Minor typos and formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
MaurizioFD committed Aug 26, 2020
1 parent 7876eec commit 14e765b
Show file tree
Hide file tree
Showing 11 changed files with 73 additions and 28 deletions.
1 change: 0 additions & 1 deletion Base/BaseMatrixFactorizationRecommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ def save_model(self, folder_path, file_name = None):
data_dict_to_save = {"USER_factors": self.USER_factors,
"ITEM_factors": self.ITEM_factors,
"use_bias": self.use_bias,
"_cold_user_mask": self._cold_user_mask,
}

if self.use_bias:
Expand Down
24 changes: 21 additions & 3 deletions Base/DataIO.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def save_data(self, file_name, data_dict_to_save):
current_file_path = current_temp_folder + attrib_name

if isinstance(attrib_data, DataFrame):
attrib_data.to_csv(current_file_path, index=False)
attrib_data.to_csv(current_file_path + ".csv", index=False)
attribute_to_file_name[attrib_name] = attrib_name + ".csv"

elif isinstance(attrib_data, sps.spmatrix):
Expand All @@ -132,8 +132,22 @@ def save_data(self, file_name, data_dict_to_save):
attribute_to_file_name[attrib_name] = attrib_name + ".npy"

else:
attribute_to_json_file[attrib_name] = attrib_data
attribute_to_file_name[attrib_name] = attrib_name + ".json"
# Try to parse it as json, if it fails and the data is a dictionary, use another zip file
try:
_ = json.dumps(attrib_data, default=json_not_serializable_handler)
attribute_to_json_file[attrib_name] = attrib_data
attribute_to_file_name[attrib_name] = attrib_name + ".json"

except TypeError:

if isinstance(attrib_data, dict):
dataIO = DataIO(folder_path = current_temp_folder)
dataIO.save_data(file_name = attrib_name, data_dict_to_save=attrib_data)
attribute_to_file_name[attrib_name] = attrib_name + ".zip"

else:
raise TypeError("Type not recognized for attribute: {}".format(attrib_name))



# Save list objects
Expand Down Expand Up @@ -211,6 +225,10 @@ def load_data(self, file_name):
# allow_pickle is FALSE to prevent using pickle and ensure portability
attrib_data = np.load(attrib_file_path, allow_pickle=False)

elif attrib_data_type == "zip":
dataIO = DataIO(folder_path = current_temp_folder)
attrib_data = dataIO.load_data(file_name = file_name)

elif attrib_data_type == "json":
with open(attrib_file_path, "r") as json_file:
attrib_data = json.load(json_file)
Expand Down
4 changes: 4 additions & 0 deletions Base/Evaluation/Evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,10 @@ def __init__(self, URM_test_list, cutoff_list, min_ratings_per_user=1, exclude_s

users_to_evaluate_mask = np.logical_or(users_to_evaluate_mask, new_mask)

if not np.all(users_to_evaluate_mask):
self._print("Ignoring {} ({:.2f}%) Users that have less than {} test interactions".format(np.sum(users_to_evaluate_mask),
100*np.sum(np.logical_not(users_to_evaluate_mask))/len(users_to_evaluate_mask), min_ratings_per_user))

self.users_to_evaluate = np.arange(self.n_users)[users_to_evaluate_mask]

if ignore_users is not None:
Expand Down
Binary file modified DL_Evaluation_TOIS_Additional_material.pdf
Binary file not shown.
42 changes: 33 additions & 9 deletions run_IJCAI_17_DELF.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import numpy as np
import os, traceback, argparse
import scipy.sparse as sps
from functools import partial
from Utils.plot_popularity import plot_popularity_bias, save_popularity_statistics

Expand Down Expand Up @@ -54,7 +55,11 @@ def cold_items_statistics(URM_train, URM_validation, URM_test, URM_test_negative



def get_cold_items(URM):

cold_items_flag = np.ediff1d(sps.csc_matrix(URM).indptr) == 0

return np.arange(0, URM.shape[1])[cold_items_flag]


def read_data_split_and_search(dataset_name,
Expand All @@ -68,14 +73,21 @@ def read_data_split_and_search(dataset_name,
os.makedirs(result_folder_path)


# Ensure both experiments use the same data
dataset_folder_path = "result_experiments/{}/{}_{}/".format(CONFERENCE_NAME, ALGORITHM_NAME,
dataset_name.replace("_remove_cold_items", ""))

if not os.path.exists(dataset_folder_path):
os.makedirs(dataset_folder_path)

if 'amazon_music' in dataset_name:
dataset = AmazonMusicReader(result_folder_path)
dataset = AmazonMusicReader(dataset_folder_path)

elif 'movielens1m' in dataset_name:
dataset = Movielens1MReader(result_folder_path, type ="ours")
elif 'movielens1m_ours' in dataset_name:
dataset = Movielens1MReader(dataset_folder_path, type ="ours")

elif 'movielens1m_original' in dataset_name:
dataset = Movielens1MReader(result_folder_path, type ="original")
dataset = Movielens1MReader(dataset_folder_path, type ="original")

else:
print("Dataset name not supported, current is {}".format(dataset_name))
Expand All @@ -101,11 +113,11 @@ def read_data_split_and_search(dataset_name,
algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name)

plot_popularity_bias([URM_train + URM_validation, URM_test],
["Train data", "Test data"],
["Training data", "Test data"],
result_folder_path + algorithm_dataset_string + "popularity_plot")

save_popularity_statistics([URM_train + URM_validation + URM_test, URM_train + URM_validation, URM_test],
["Full data", "Train data", "Test data"],
["Full data", "Training data", "Test data"],
result_folder_path + algorithm_dataset_string + "popularity_statistics")


Expand Down Expand Up @@ -135,9 +147,21 @@ def read_data_split_and_search(dataset_name,
cutoff_list_validation = [10]
cutoff_list_test = [5, 10, 20]

evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation)
evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=cutoff_list_test)
if "_remove_cold_items" in dataset_name:
ignore_items_validation = get_cold_items(URM_train)
ignore_items_test = get_cold_items(URM_train + URM_validation)
else:
ignore_items_validation = None
ignore_items_test = None

evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation, ignore_items=ignore_items_validation)
evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=cutoff_list_test, ignore_items=ignore_items_test)

# The Evaluator automatically skips users with no test interactions
# in this case we need the evaluation done with and without cold items to be comparable
# So we ensure the users that are included in the evaluation are the same in both cases.
evaluator_validation.users_to_evaluate = np.arange(URM_train.shape[0])
evaluator_test.users_to_evaluate = np.arange(URM_train.shape[0])

runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative,
URM_train = URM_train,
Expand Down Expand Up @@ -289,7 +313,7 @@ def read_data_split_and_search(dataset_name,

KNN_similarity_to_report_list = ["cosine", "dice", "jaccard", "asymmetric", "tversky"]

dataset_list = ['amazon_music', 'movielens1m']
dataset_list = ['amazon_music', 'movielens1m_ours', 'amazon_music_remove_cold_items', 'movielens1m_ours_remove_cold_items']

for dataset_name in dataset_list:
read_data_split_and_search(dataset_name,
Expand Down
6 changes: 3 additions & 3 deletions run_IJCAI_17_DMF.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,11 @@ def read_data_split_and_search(dataset_name,
algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name)

plot_popularity_bias([URM_train + URM_validation, URM_test],
["URM train", "URM test"],
["Training data", "Test data"],
result_folder_path + algorithm_dataset_string + "popularity_plot")

save_popularity_statistics([URM_train + URM_validation, URM_test],
["URM train", "URM test"],
save_popularity_statistics([URM_train + URM_validation + URM_test, URM_train + URM_validation, URM_test],
["Full data", "Training data", "Test data"],
result_folder_path + algorithm_dataset_string + "popularity_statistics")


Expand Down
2 changes: 1 addition & 1 deletion run_IJCAI_18_NeuRec.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ def read_data_split_and_search(dataset_name,

result_loader.generate_latex_results(file_name + "{}_latex_results.txt".format("beyond_accuracy_metrics"),
metrics_list = ["DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"],
cutoffs_list = [50],
cutoffs_list = [10],
table_title = None,
highlight_best = True)

Expand Down
6 changes: 3 additions & 3 deletions run_KDD_18_MCRec.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ def read_data_split_and_search(dataset_name,
algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name)

plot_popularity_bias([URM_train + URM_validation, URM_test],
["URM train", "URM test"],
["Training data", "Test data"],
result_folder_path + algorithm_dataset_string + "popularity_plot")

save_popularity_statistics([URM_train + URM_validation, URM_test],
["URM train", "URM test"],
save_popularity_statistics([URM_train + URM_validation + URM_test, URM_train + URM_validation, URM_test],
["Full data", "Training data", "Test data"],
result_folder_path + algorithm_dataset_string + "popularity_statistics")


Expand Down
4 changes: 2 additions & 2 deletions run_RecSys_18_SpectralCF.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,11 @@ def read_data_split_and_search(dataset_name, cold_start = False, cold_items=None
algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name)

plot_popularity_bias([URM_train + URM_validation, URM_test],
["Train data", "Test data"],
["Training data", "Test data"],
result_folder_path + algorithm_dataset_string + "popularity_plot")

save_popularity_statistics([URM_train + URM_validation + URM_test, URM_train + URM_validation, URM_test],
["URM_all", "URM train", "URM test"],
["Full data", "Training data", "Test data"],
result_folder_path + algorithm_dataset_string + "popularity_statistics")


Expand Down
6 changes: 3 additions & 3 deletions run_SIGIR_18_CMN.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,11 @@ def read_data_split_and_search(dataset_name,
algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name)

plot_popularity_bias([URM_train + URM_validation, URM_test],
["URM train", "URM test"],
["Training data", "Test data"],
result_folder_path + algorithm_dataset_string + "popularity_plot")

save_popularity_statistics([URM_train + URM_validation, URM_test],
["URM train", "URM test"],
save_popularity_statistics([URM_train + URM_validation + URM_test, URM_train + URM_validation, URM_test],
["Full data", "Training data", "Test data"],
result_folder_path + algorithm_dataset_string + "popularity_statistics")


Expand Down
6 changes: 3 additions & 3 deletions run_WWW_17_NeuMF.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,11 @@ def read_data_split_and_search(dataset_name,
algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name)

plot_popularity_bias([URM_train + URM_validation, URM_test],
["URM train", "URM test"],
["Training data", "Test data"],
result_folder_path + algorithm_dataset_string + "popularity_plot")

save_popularity_statistics([URM_train + URM_validation, URM_test],
["URM train", "URM test"],
save_popularity_statistics([URM_train + URM_validation + URM_test, URM_train + URM_validation, URM_test],
["Full data", "Training data", "Test data"],
result_folder_path + algorithm_dataset_string + "popularity_statistics")


Expand Down

0 comments on commit 14e765b

Please sign in to comment.