From 4642602bee208d7b9ea8ecb316642a214803ca1b Mon Sep 17 00:00:00 2001 From: rnyak Date: Fri, 6 May 2022 13:55:18 -0400 Subject: [PATCH] Add Unit test for Building and deploying multi-stage Recsys nbs (#288) * finalize unit test for poc * fix dynamic path for the 2nd notebook * Update tests/examples/test_building_deploying_multi_stage_RecSys.py Co-authored-by: Ben Frederickson * fix quotes Co-authored-by: Ben Frederickson --- ...ding-Recommender-Systems-with-Merlin.ipynb | 832 ++++++++---------- ...lti-stage-RecSys-with-Merlin-Systems.ipynb | 274 +++--- ...t_building_deploying_multi_stage_RecSys.py | 24 +- 3 files changed, 493 insertions(+), 637 deletions(-) diff --git a/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb b/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb index 55ba1f813..db0ea062c 100644 --- a/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb +++ b/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "2cd8cc8d-5cc7-4a9f-91e5-3deec6f1fe74", "metadata": {}, "outputs": [], @@ -128,7 +128,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "08cdbfcc", "metadata": {}, "outputs": [ @@ -136,19 +136,18 @@ "name": "stderr", "output_type": "stream", "text": [ - "2022-05-04 13:26:59.541865: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA\n", + "2022-05-05 14:34:58.341639: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2022-05-04 13:27:01.553019: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:214] Using CUDA malloc Async allocator for GPU: 0\n", - "2022-05-04 13:27:01.553153: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16254 MB memory: -> device: 0, name: Quadro GV100, pci bus id: 0000:15:00.0, compute capability: 7.0\n", - "2022-05-04 13:27:01.553841: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:214] Using CUDA malloc Async allocator for GPU: 1\n", - "2022-05-04 13:27:01.553903: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 30338 MB memory: -> device: 1, name: Quadro GV100, pci bus id: 0000:2d:00.0, compute capability: 7.0\n" + "2022-05-05 14:35:00.407290: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:214] Using CUDA malloc Async allocator for GPU: 0\n", + "2022-05-05 14:35:00.407448: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 30667 MB memory: -> device: 0, name: Quadro GV100, pci bus id: 0000:15:00.0, compute capability: 7.0\n", + "2022-05-05 14:35:00.408163: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:214] Using CUDA malloc Async allocator for GPU: 1\n", + "2022-05-05 14:35:00.408224: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 30315 MB memory: -> device: 1, name: Quadro GV100, pci bus id: 0000:2d:00.0, compute capability: 7.0\n" ] } ], "source": [ "import os\n", "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", - "import cudf\n", "import glob\n", "import gc\n", "\n", @@ -166,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "028a1398-76a8-4998-97d8-34a806e130d3", "metadata": {}, "outputs": [], @@ -188,7 +187,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "81ddb370", "metadata": {}, "outputs": [], @@ -207,10 +206,19 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "b44b3378-7297-4946-a271-742a9239bc3e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/core/merlin/io/dataset.py:251: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n", + " warnings.warn(\n" + ] + } + ], "source": [ "from merlin.datasets.synthetic import generate_data\n", "\n", @@ -244,7 +252,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "550d45c9", "metadata": {}, "outputs": [ @@ -252,8 +260,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 145 µs, sys: 46 µs, total: 191 µs\n", - "Wall time: 195 µs\n" + "CPU times: user 151 µs, sys: 45 µs, total: 196 µs\n", + "Wall time: 200 µs\n" ] } ], @@ -285,7 +293,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "e117e7b5-5007-424b-8d3f-9e1db245fd4c", "metadata": {}, "outputs": [ @@ -293,7 +301,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/lib/python3.8/site-packages/cudf/core/dataframe.py:1292: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.\n", + "/core/merlin/io/dataset.py:251: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n", " warnings.warn(\n" ] } @@ -326,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "cb870461-6ac2-49b2-ba6a-2da6ecb57f1d", "metadata": {}, "outputs": [], @@ -341,7 +349,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "30e4ebc2", "metadata": {}, "outputs": [ @@ -351,7 +359,7 @@ "'click'" ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -371,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "e4325080", "metadata": {}, "outputs": [], @@ -387,7 +395,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "bfe2aa9e", "metadata": {}, "outputs": [ @@ -395,30 +403,30 @@ "name": "stderr", "output_type": "stream", "text": [ - "2022-05-04 13:27:04.217413: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n" + "2022-05-05 14:35:03.403519: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "5/5 [==============================] - 6s 224ms/step - auc: 0.4997 - loss: 0.6932 - regularization_loss: 0.0000e+00 - total_loss: 0.6932 - val_auc: 0.5000 - val_loss: 0.6931 - val_regularization_loss: 0.0000e+00 - val_total_loss: 0.6931\n" + "5/5 [==============================] - 6s 217ms/step - auc: 0.4983 - loss: 0.6931 - regularization_loss: 0.0000e+00 - total_loss: 0.6931 - val_auc: 0.4995 - val_loss: 0.6932 - val_regularization_loss: 0.0000e+00 - val_total_loss: 0.6932\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "2022-05-04 13:27:10.905074: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: cond/branch_executed/_13\n" + "2022-05-05 14:35:10.085952: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: cond/branch_executed/_13\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -438,7 +446,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "dd78a82e", "metadata": {}, "outputs": [], @@ -457,7 +465,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "id": "00447c12-ea80-4d98-ab47-cc1a982a6958", "metadata": {}, "outputs": [], @@ -483,7 +491,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "id": "00de24e9-331a-486e-9843-6c554ad2ec77", "metadata": {}, "outputs": [], @@ -501,7 +509,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "id": "22a7d605-478f-40e6-a5dc-3e7a61e9b035", "metadata": {}, "outputs": [ @@ -509,7 +517,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/lib/python3.8/site-packages/cudf/core/dataframe.py:1292: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.\n", + "/core/merlin/io/dataset.py:251: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n", " warnings.warn(\n" ] } @@ -534,7 +542,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "id": "dc150549-6fa0-441f-939d-a358e56d5e43", "metadata": {}, "outputs": [], @@ -548,7 +556,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "id": "02471088-0ed8-42e7-968e-b7e68865d55c", "metadata": {}, "outputs": [], @@ -565,7 +573,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "id": "d6703d7c-d38f-4d6d-a20a-9ee95ff1e256", "metadata": {}, "outputs": [ @@ -573,30 +581,30 @@ "name": "stdout", "output_type": "stream", "text": [ - "4/5 [=======================>......] - ETA: 0s - recall_at_10: 0.0107 - ndcg_10: 0.0091 - loss: 8.9768 - regularization_loss: 0.0000e+00 - total_loss: 8.9768" + "4/5 [=======================>......] - ETA: 0s - recall_at_10: 0.0199 - ndcg_10: 0.0175 - loss: 8.9784 - regularization_loss: 0.0000e+00 - total_loss: 8.9784" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "2022-05-04 13:27:26.542717: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: cond/branch_executed/_24\n" + "2022-05-05 14:35:25.929977: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: cond/branch_executed/_24\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "5/5 [==============================] - 6s 332ms/step - recall_at_10: 0.0125 - ndcg_10: 0.0109 - loss: 8.5304 - regularization_loss: 0.0000e+00 - total_loss: 8.5304 - val_recall_at_10: 0.0410 - val_ndcg_10: 0.0387 - val_loss: 8.7822 - val_regularization_loss: 0.0000e+00 - val_total_loss: 8.7822\n" + "5/5 [==============================] - 6s 326ms/step - recall_at_10: 0.0209 - ndcg_10: 0.0185 - loss: 8.5198 - regularization_loss: 0.0000e+00 - total_loss: 8.5198 - val_recall_at_10: 0.0419 - val_ndcg_10: 0.0394 - val_loss: 8.8001 - val_regularization_loss: 0.0000e+00 - val_total_loss: 8.8001\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 20, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -632,7 +640,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "id": "2e7e96d2-9cd2-40d1-b356-8cd76b57bb4a", "metadata": {}, "outputs": [ @@ -641,7 +649,7 @@ "output_type": "stream", "text": [ "\n", - "Creating a new Feast repository in \u001b[1m\u001b[32m/tmp/examples/feature_repo\u001b[0m.\n", + "Creating a new Feast repository in \u001b[1m\u001b[32m/Merlin/examples/Building-and-deploying-multi-stage-RecSys/feature_repo\u001b[0m.\n", "\n" ] } @@ -660,7 +668,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "id": "26ba2521-ed1b-4c2b-afdd-26b4a5a9c008", "metadata": {}, "outputs": [], @@ -679,7 +687,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "id": "2af24597-e89c-43a4-9a13-458d8bed7c8a", "metadata": {}, "outputs": [], @@ -698,10 +706,19 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "id": "ea0b369c-2f01-42e3-9f3c-74c3ff4a6d64", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/core/merlin/io/dataset.py:251: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n", + " warnings.warn(\n" + ] + } + ], "source": [ "from merlin.models.utils.dataset import unique_rows_by_features\n", "user_features = unique_rows_by_features(train, Tags.USER, Tags.USER_ID).compute().reset_index(drop=True)" @@ -709,7 +726,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "id": "6b0949f9-e67a-414f-9d74-65f138e820a8", "metadata": {}, "outputs": [ @@ -751,6 +768,8 @@ " \n", " \n", " 0\n", + " 14\n", + " 14\n", " 1\n", " 1\n", " 1\n", @@ -758,15 +777,14 @@ " 1\n", " 1\n", " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", + " 14\n", + " 14\n", + " 14\n", " \n", " \n", " 1\n", - " 2\n", + " 38\n", + " 38\n", " 2\n", " 1\n", " 1\n", @@ -774,15 +792,14 @@ " 1\n", " 1\n", " 1\n", - " 1\n", - " 2\n", - " 2\n", - " 2\n", + " 38\n", + " 38\n", + " 38\n", " \n", " \n", " 2\n", - " 3\n", - " 3\n", + " 8\n", + " 8\n", " 1\n", " 1\n", " 1\n", @@ -790,14 +807,14 @@ " 1\n", " 1\n", " 1\n", - " 3\n", - " 3\n", - " 3\n", + " 8\n", + " 8\n", + " 8\n", " \n", " \n", " 3\n", - " 4\n", - " 4\n", + " 5\n", + " 5\n", " 1\n", " 1\n", " 1\n", @@ -805,24 +822,24 @@ " 1\n", " 1\n", " 1\n", - " 4\n", - " 4\n", - " 4\n", + " 5\n", + " 5\n", + " 5\n", " \n", " \n", " 4\n", - " 5\n", - " 5\n", - " 1\n", + " 31\n", + " 31\n", + " 2\n", " 1\n", " 1\n", " 1\n", " 1\n", " 1\n", " 1\n", - " 5\n", - " 5\n", - " 5\n", + " 31\n", + " 31\n", + " 31\n", " \n", " \n", "\n", @@ -830,28 +847,28 @@ ], "text/plain": [ " user_id user_shops user_profile user_group user_gender user_age \\\n", - "0 1 1 1 1 1 1 \n", - "1 2 2 1 1 1 1 \n", - "2 3 3 1 1 1 1 \n", - "3 4 4 1 1 1 1 \n", - "4 5 5 1 1 1 1 \n", + "0 14 14 1 1 1 1 \n", + "1 38 38 2 1 1 1 \n", + "2 8 8 1 1 1 1 \n", + "3 5 5 1 1 1 1 \n", + "4 31 31 2 1 1 1 \n", "\n", " user_consumption_2 user_is_occupied user_geography user_intentions \\\n", - "0 1 1 1 1 \n", - "1 1 1 1 2 \n", - "2 1 1 1 3 \n", - "3 1 1 1 4 \n", - "4 1 1 1 5 \n", + "0 1 1 1 14 \n", + "1 1 1 1 38 \n", + "2 1 1 1 8 \n", + "3 1 1 1 5 \n", + "4 1 1 1 31 \n", "\n", " user_brands user_categories \n", - "0 1 1 \n", - "1 2 2 \n", - "2 3 3 \n", - "3 4 4 \n", - "4 5 5 " + "0 14 14 \n", + "1 38 38 \n", + "2 8 8 \n", + "3 5 5 \n", + "4 31 31 " ] }, - "execution_count": 25, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -870,7 +887,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 25, "id": "d30bd2f8-8a78-4df7-9bc4-42bd741c5b99", "metadata": {}, "outputs": [], @@ -884,7 +901,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 26, "id": "d4998cd1-9dcd-4911-8f23-372e197b41e9", "metadata": {}, "outputs": [ @@ -928,6 +945,8 @@ " \n", " \n", " 0\n", + " 14\n", + " 14\n", " 1\n", " 1\n", " 1\n", @@ -935,17 +954,16 @@ " 1\n", " 1\n", " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 2022-05-04 13:27:33.635692\n", - " 2022-05-04 13:27:34.000771\n", + " 14\n", + " 14\n", + " 14\n", + " 2022-05-05 14:35:32.971809\n", + " 2022-05-05 14:35:32.973595\n", " \n", " \n", " 1\n", - " 2\n", + " 38\n", + " 38\n", " 2\n", " 1\n", " 1\n", @@ -953,17 +971,16 @@ " 1\n", " 1\n", " 1\n", - " 1\n", - " 2\n", - " 2\n", - " 2\n", - " 2022-05-04 13:27:33.635692\n", - " 2022-05-04 13:27:34.000771\n", + " 38\n", + " 38\n", + " 38\n", + " 2022-05-05 14:35:32.971809\n", + " 2022-05-05 14:35:32.973595\n", " \n", " \n", " 2\n", - " 3\n", - " 3\n", + " 8\n", + " 8\n", " 1\n", " 1\n", " 1\n", @@ -971,16 +988,16 @@ " 1\n", " 1\n", " 1\n", - " 3\n", - " 3\n", - " 3\n", - " 2022-05-04 13:27:33.635692\n", - " 2022-05-04 13:27:34.000771\n", + " 8\n", + " 8\n", + " 8\n", + " 2022-05-05 14:35:32.971809\n", + " 2022-05-05 14:35:32.973595\n", " \n", " \n", " 3\n", - " 4\n", - " 4\n", + " 5\n", + " 5\n", " 1\n", " 1\n", " 1\n", @@ -988,28 +1005,28 @@ " 1\n", " 1\n", " 1\n", - " 4\n", - " 4\n", - " 4\n", - " 2022-05-04 13:27:33.635692\n", - " 2022-05-04 13:27:34.000771\n", + " 5\n", + " 5\n", + " 5\n", + " 2022-05-05 14:35:32.971809\n", + " 2022-05-05 14:35:32.973595\n", " \n", " \n", " 4\n", - " 5\n", - " 5\n", - " 1\n", + " 31\n", + " 31\n", + " 2\n", " 1\n", " 1\n", " 1\n", " 1\n", " 1\n", " 1\n", - " 5\n", - " 5\n", - " 5\n", - " 2022-05-04 13:27:33.635692\n", - " 2022-05-04 13:27:34.000771\n", + " 31\n", + " 31\n", + " 31\n", + " 2022-05-05 14:35:32.971809\n", + " 2022-05-05 14:35:32.973595\n", " \n", " \n", "\n", @@ -1017,35 +1034,35 @@ ], "text/plain": [ " user_id user_shops user_profile user_group user_gender user_age \\\n", - "0 1 1 1 1 1 1 \n", - "1 2 2 1 1 1 1 \n", - "2 3 3 1 1 1 1 \n", - "3 4 4 1 1 1 1 \n", - "4 5 5 1 1 1 1 \n", + "0 14 14 1 1 1 1 \n", + "1 38 38 2 1 1 1 \n", + "2 8 8 1 1 1 1 \n", + "3 5 5 1 1 1 1 \n", + "4 31 31 2 1 1 1 \n", "\n", " user_consumption_2 user_is_occupied user_geography user_intentions \\\n", - "0 1 1 1 1 \n", - "1 1 1 1 2 \n", - "2 1 1 1 3 \n", - "3 1 1 1 4 \n", - "4 1 1 1 5 \n", + "0 1 1 1 14 \n", + "1 1 1 1 38 \n", + "2 1 1 1 8 \n", + "3 1 1 1 5 \n", + "4 1 1 1 31 \n", "\n", " user_brands user_categories datetime \\\n", - "0 1 1 2022-05-04 13:27:33.635692 \n", - "1 2 2 2022-05-04 13:27:33.635692 \n", - "2 3 3 2022-05-04 13:27:33.635692 \n", - "3 4 4 2022-05-04 13:27:33.635692 \n", - "4 5 5 2022-05-04 13:27:33.635692 \n", + "0 14 14 2022-05-05 14:35:32.971809 \n", + "1 38 38 2022-05-05 14:35:32.971809 \n", + "2 8 8 2022-05-05 14:35:32.971809 \n", + "3 5 5 2022-05-05 14:35:32.971809 \n", + "4 31 31 2022-05-05 14:35:32.971809 \n", "\n", " created \n", - "0 2022-05-04 13:27:34.000771 \n", - "1 2022-05-04 13:27:34.000771 \n", - "2 2022-05-04 13:27:34.000771 \n", - "3 2022-05-04 13:27:34.000771 \n", - "4 2022-05-04 13:27:34.000771 " + "0 2022-05-05 14:35:32.973595 \n", + "1 2022-05-05 14:35:32.973595 \n", + "2 2022-05-05 14:35:32.973595 \n", + "3 2022-05-05 14:35:32.973595 \n", + "4 2022-05-05 14:35:32.973595 " ] }, - "execution_count": 27, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -1056,7 +1073,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 27, "id": "2981b3ed-6156-49f0-aa14-326a3853a58a", "metadata": {}, "outputs": [], @@ -1066,17 +1083,26 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 28, "id": "0a33a668-8e2a-4546-8f54-0060d405ba91", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/core/merlin/io/dataset.py:251: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n", + " warnings.warn(\n" + ] + } + ], "source": [ "item_features = unique_rows_by_features(train, Tags.ITEM, Tags.ITEM_ID).compute().reset_index(drop=True)" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 29, "id": "97189581-473c-4928-8be7-ec31b86d69ee", "metadata": {}, "outputs": [ @@ -1086,7 +1112,7 @@ "(448, 4)" ] }, - "execution_count": 30, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -1097,7 +1123,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 30, "id": "68a694d6-926f-4b0f-8edc-8cc7ac85ade7", "metadata": {}, "outputs": [], @@ -1110,7 +1136,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "id": "6c03fa22-b112-4243-bbe1-1cd7260cb85b", "metadata": {}, "outputs": [ @@ -1146,12 +1172,12 @@ " \n", " \n", " 0\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 2022-05-04 13:27:34.097313\n", - " 2022-05-04 13:27:34.101176\n", + " 15\n", + " 15\n", + " 15\n", + " 15\n", + " 2022-05-05 14:35:33.084300\n", + " 2022-05-05 14:35:33.086938\n", " \n", " \n", " 1\n", @@ -1159,35 +1185,35 @@ " 2\n", " 2\n", " 2\n", - " 2022-05-04 13:27:34.097313\n", - " 2022-05-04 13:27:34.101176\n", + " 2022-05-05 14:35:33.084300\n", + " 2022-05-05 14:35:33.086938\n", " \n", " \n", " 2\n", - " 3\n", - " 3\n", - " 3\n", - " 3\n", - " 2022-05-04 13:27:34.097313\n", - " 2022-05-04 13:27:34.101176\n", + " 59\n", + " 59\n", + " 59\n", + " 59\n", + " 2022-05-05 14:35:33.084300\n", + " 2022-05-05 14:35:33.086938\n", " \n", " \n", " 3\n", - " 4\n", - " 4\n", - " 4\n", - " 4\n", - " 2022-05-04 13:27:34.097313\n", - " 2022-05-04 13:27:34.101176\n", + " 149\n", + " 149\n", + " 149\n", + " 149\n", + " 2022-05-05 14:35:33.084300\n", + " 2022-05-05 14:35:33.086938\n", " \n", " \n", " 4\n", - " 5\n", - " 5\n", - " 5\n", - " 5\n", - " 2022-05-04 13:27:34.097313\n", - " 2022-05-04 13:27:34.101176\n", + " 63\n", + " 63\n", + " 63\n", + " 63\n", + " 2022-05-05 14:35:33.084300\n", + " 2022-05-05 14:35:33.086938\n", " \n", " \n", "\n", @@ -1195,21 +1221,21 @@ ], "text/plain": [ " item_id item_category item_shop item_brand datetime \\\n", - "0 1 1 1 1 2022-05-04 13:27:34.097313 \n", - "1 2 2 2 2 2022-05-04 13:27:34.097313 \n", - "2 3 3 3 3 2022-05-04 13:27:34.097313 \n", - "3 4 4 4 4 2022-05-04 13:27:34.097313 \n", - "4 5 5 5 5 2022-05-04 13:27:34.097313 \n", + "0 15 15 15 15 2022-05-05 14:35:33.084300 \n", + "1 2 2 2 2 2022-05-05 14:35:33.084300 \n", + "2 59 59 59 59 2022-05-05 14:35:33.084300 \n", + "3 149 149 149 149 2022-05-05 14:35:33.084300 \n", + "4 63 63 63 63 2022-05-05 14:35:33.084300 \n", "\n", " created \n", - "0 2022-05-04 13:27:34.101176 \n", - "1 2022-05-04 13:27:34.101176 \n", - "2 2022-05-04 13:27:34.101176 \n", - "3 2022-05-04 13:27:34.101176 \n", - "4 2022-05-04 13:27:34.101176 " + "0 2022-05-05 14:35:33.086938 \n", + "1 2022-05-05 14:35:33.086938 \n", + "2 2022-05-05 14:35:33.086938 \n", + "3 2022-05-05 14:35:33.086938 \n", + "4 2022-05-05 14:35:33.086938 " ] }, - "execution_count": 32, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -1220,7 +1246,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 32, "id": "c312884b-a1f8-4e08-8068-696e06a9bf46", "metadata": {}, "outputs": [], @@ -1239,10 +1265,21 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 33, "id": "00f1fe65-882e-4962-bb16-19a130fda215", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/core/merlin/io/dataset.py:251: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n", + " warnings.warn(\n", + "/core/merlin/io/dataset.py:251: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n", + " warnings.warn(\n" + ] + } + ], "source": [ "item_embs = model.item_embeddings(Dataset(item_features, schema=schema), batch_size=1024)\n", "item_embs_df = item_embs.compute(scheduler=\"synchronous\")" @@ -1250,7 +1287,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 34, "id": "cf8b82ea-6cce-4dab-ad17-114b5e7eabd4", "metadata": {}, "outputs": [], @@ -1261,7 +1298,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 35, "id": "e02f0957-6665-400a-80c0-60b307466caf", "metadata": {}, "outputs": [ @@ -1312,123 +1349,123 @@ " \n", " \n", " 0\n", - " 1\n", - " -0.023812\n", - " 0.049087\n", - " -0.006011\n", - " -0.018138\n", - " 0.076004\n", - " -0.047629\n", - " -0.016437\n", - " -0.011316\n", - " -0.010015\n", + " 15\n", + " 0.009764\n", + " 0.008560\n", + " -0.008236\n", + " 0.001079\n", + " 0.006261\n", + " 0.009971\n", + " 0.009541\n", + " -0.019058\n", + " 0.036954\n", " ...\n", - " -0.023935\n", - " -0.002843\n", - " 0.027594\n", - " -0.002276\n", - " -0.011821\n", - " 0.011062\n", - " 0.005805\n", - " -0.014591\n", - " -0.013899\n", - " 0.030367\n", + " 0.003188\n", + " 0.025160\n", + " 0.003522\n", + " 0.024247\n", + " 0.023871\n", + " -0.017902\n", + " 0.007299\n", + " -0.013606\n", + " 0.003823\n", + " 0.013440\n", " \n", " \n", " 1\n", " 2\n", - " -0.026561\n", - " -0.025152\n", - " -0.057405\n", - " -0.000327\n", - " 0.028478\n", - " -0.010988\n", - " -0.009221\n", - " 0.001220\n", - " -0.013272\n", + " 0.014012\n", + " 0.019497\n", + " -0.015541\n", + " -0.006398\n", + " 0.004327\n", + " 0.077202\n", + " 0.033808\n", + " -0.008815\n", + " 0.062406\n", " ...\n", - " -0.015662\n", - " -0.002456\n", - " -0.043615\n", - " 0.043939\n", - " -0.025433\n", - " -0.007775\n", - " 0.029884\n", - " -0.058831\n", - " -0.011239\n", - " -0.000120\n", + " 0.011110\n", + " 0.010548\n", + " -0.021843\n", + " 0.044792\n", + " 0.043012\n", + " -0.014118\n", + " 0.028639\n", + " 0.021822\n", + " -0.008754\n", + " -0.021522\n", " \n", " \n", " 2\n", - " 3\n", - " 0.003156\n", - " 0.002400\n", - " -0.020462\n", - " -0.004567\n", - " 0.022638\n", - " -0.007897\n", - " 0.011588\n", - " 0.002444\n", - " -0.052003\n", + " 59\n", + " 0.019117\n", + " 0.004479\n", + " -0.031548\n", + " -0.020642\n", + " 0.035912\n", + " 0.015081\n", + " 0.001050\n", + " -0.005041\n", + " 0.014927\n", " ...\n", - " 0.008699\n", - " -0.032143\n", - " 0.013315\n", - " -0.033496\n", - " -0.003374\n", - " 0.009305\n", - " -0.002533\n", - " -0.011182\n", - " -0.022575\n", - " 0.002656\n", + " -0.012306\n", + " 0.012168\n", + " -0.012633\n", + " 0.017448\n", + " 0.016328\n", + " -0.022380\n", + " -0.008413\n", + " 0.028804\n", + " 0.009847\n", + " -0.018740\n", " \n", " \n", " 3\n", - " 4\n", - " 0.015630\n", - " -0.026810\n", - " -0.038625\n", - " -0.009386\n", - " 0.070407\n", - " -0.002853\n", - " -0.044986\n", - " -0.026715\n", - " 0.029586\n", + " 149\n", + " 0.022785\n", + " 0.018213\n", + " -0.035266\n", + " 0.002669\n", + " 0.004770\n", + " 0.023919\n", + " 0.004185\n", + " -0.048692\n", + " 0.013208\n", " ...\n", - " 0.038602\n", - " 0.005720\n", - " -0.027890\n", - " 0.029849\n", - " -0.024938\n", - " -0.041847\n", - " -0.003418\n", - " -0.057658\n", - " 0.015824\n", - " -0.025491\n", + " -0.002208\n", + " 0.023864\n", + " -0.019109\n", + " -0.006574\n", + " 0.012167\n", + " 0.009672\n", + " -0.016330\n", + " 0.005729\n", + " 0.025929\n", + " -0.008024\n", " \n", " \n", " 4\n", - " 5\n", - " -0.029692\n", - " -0.009475\n", - " -0.058727\n", - " 0.000272\n", - " 0.045521\n", - " 0.018626\n", - " -0.003292\n", - " -0.022988\n", - " -0.009460\n", + " 63\n", + " -0.012254\n", + " -0.016057\n", + " -0.017900\n", + " -0.017911\n", + " -0.037520\n", + " 0.012523\n", + " -0.007489\n", + " -0.005636\n", + " 0.017751\n", " ...\n", - " 0.023875\n", - " 0.024851\n", - " -0.022909\n", - " 0.022784\n", - " 0.001589\n", - " 0.024690\n", - " 0.047870\n", - " -0.045683\n", - " -0.041048\n", - " 0.019730\n", + " 0.009128\n", + " 0.032535\n", + " -0.020749\n", + " 0.038132\n", + " 0.032030\n", + " -0.052667\n", + " -0.014624\n", + " -0.009040\n", + " -0.029470\n", + " -0.004175\n", " \n", " \n", "\n", @@ -1437,30 +1474,30 @@ ], "text/plain": [ " item_id 0 1 2 3 4 5 \\\n", - "0 1 -0.023812 0.049087 -0.006011 -0.018138 0.076004 -0.047629 \n", - "1 2 -0.026561 -0.025152 -0.057405 -0.000327 0.028478 -0.010988 \n", - "2 3 0.003156 0.002400 -0.020462 -0.004567 0.022638 -0.007897 \n", - "3 4 0.015630 -0.026810 -0.038625 -0.009386 0.070407 -0.002853 \n", - "4 5 -0.029692 -0.009475 -0.058727 0.000272 0.045521 0.018626 \n", + "0 15 0.009764 0.008560 -0.008236 0.001079 0.006261 0.009971 \n", + "1 2 0.014012 0.019497 -0.015541 -0.006398 0.004327 0.077202 \n", + "2 59 0.019117 0.004479 -0.031548 -0.020642 0.035912 0.015081 \n", + "3 149 0.022785 0.018213 -0.035266 0.002669 0.004770 0.023919 \n", + "4 63 -0.012254 -0.016057 -0.017900 -0.017911 -0.037520 0.012523 \n", "\n", " 6 7 8 ... 54 55 56 57 \\\n", - "0 -0.016437 -0.011316 -0.010015 ... -0.023935 -0.002843 0.027594 -0.002276 \n", - "1 -0.009221 0.001220 -0.013272 ... -0.015662 -0.002456 -0.043615 0.043939 \n", - "2 0.011588 0.002444 -0.052003 ... 0.008699 -0.032143 0.013315 -0.033496 \n", - "3 -0.044986 -0.026715 0.029586 ... 0.038602 0.005720 -0.027890 0.029849 \n", - "4 -0.003292 -0.022988 -0.009460 ... 0.023875 0.024851 -0.022909 0.022784 \n", + "0 0.009541 -0.019058 0.036954 ... 0.003188 0.025160 0.003522 0.024247 \n", + "1 0.033808 -0.008815 0.062406 ... 0.011110 0.010548 -0.021843 0.044792 \n", + "2 0.001050 -0.005041 0.014927 ... -0.012306 0.012168 -0.012633 0.017448 \n", + "3 0.004185 -0.048692 0.013208 ... -0.002208 0.023864 -0.019109 -0.006574 \n", + "4 -0.007489 -0.005636 0.017751 ... 0.009128 0.032535 -0.020749 0.038132 \n", "\n", " 58 59 60 61 62 63 \n", - "0 -0.011821 0.011062 0.005805 -0.014591 -0.013899 0.030367 \n", - "1 -0.025433 -0.007775 0.029884 -0.058831 -0.011239 -0.000120 \n", - "2 -0.003374 0.009305 -0.002533 -0.011182 -0.022575 0.002656 \n", - "3 -0.024938 -0.041847 -0.003418 -0.057658 0.015824 -0.025491 \n", - "4 0.001589 0.024690 0.047870 -0.045683 -0.041048 0.019730 \n", + "0 0.023871 -0.017902 0.007299 -0.013606 0.003823 0.013440 \n", + "1 0.043012 -0.014118 0.028639 0.021822 -0.008754 -0.021522 \n", + "2 0.016328 -0.022380 -0.008413 0.028804 0.009847 -0.018740 \n", + "3 0.012167 0.009672 -0.016330 0.005729 0.025929 -0.008024 \n", + "4 0.032030 -0.052667 -0.014624 -0.009040 -0.029470 -0.004175 \n", "\n", "[5 rows x 65 columns]" ] }, - "execution_count": 36, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1471,7 +1508,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 36, "id": "66d7271e-0ea6-4568-ac5a-04089735f542", "metadata": {}, "outputs": [], @@ -1498,51 +1535,7 @@ }, { "cell_type": "code", - "execution_count": 38, - "id": "c4005d9b-1dba-40e0-ba69-c272edc8b690", - "metadata": {}, - "outputs": [], - "source": [ - "# %%writefile /Merlin/examples/Building-and-deploying-multi-stage-RecSys/feature_repo/user_features.py\n", - "# from google.protobuf.duration_pb2 import Duration\n", - "# import datetime \n", - "# from feast import Entity, Feature, FeatureView, ValueType\n", - "# from feast.infra.offline_stores.file_source import FileSource\n", - "\n", - "# user_features = FileSource(\n", - "# path=\"/Merlin/examples/Building-and-deploying-multi-stage-RecSys/feature_repo/data/user_features.parquet\",\n", - "# event_timestamp_column=\"datetime\",\n", - "# created_timestamp_column=\"created\",\n", - "# )\n", - "\n", - "# user = Entity(name=\"user_id\", value_type=ValueType.INT32, description=\"user id\",)\n", - "\n", - "# user_features_view = FeatureView(\n", - "# name=\"user_features\",\n", - "# entities=[\"user_id\"],\n", - "# ttl=Duration(seconds=86400 * 7),\n", - "# features=[\n", - "# Feature(name=\"user_shops\", dtype=ValueType.INT32),\n", - "# Feature(name=\"user_profile\", dtype=ValueType.INT32),\n", - "# Feature(name=\"user_group\", dtype=ValueType.INT32),\n", - "# Feature(name=\"user_gender\", dtype=ValueType.INT32),\n", - "# Feature(name=\"user_age\", dtype=ValueType.INT32),\n", - "# Feature(name=\"user_consumption_2\", dtype=ValueType.INT32),\n", - "# Feature(name=\"user_is_occupied\", dtype=ValueType.INT32),\n", - "# Feature(name=\"user_geography\", dtype=ValueType.INT32),\n", - "# Feature(name=\"user_intentions\", dtype=ValueType.INT32),\n", - "# Feature(name=\"user_brands\", dtype=ValueType.INT32),\n", - "# Feature(name=\"user_categories\", dtype=ValueType.INT32),\n", - "# ],\n", - "# online=True,\n", - "# input=user_features,\n", - "# tags={},\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 39, + "execution_count": 37, "id": "4ee27d67-e35a-42c5-8025-ed73f35c8e13", "metadata": {}, "outputs": [], @@ -1556,7 +1549,7 @@ "from feast.infra.offline_stores.file_source import FileSource\n", "\n", "user_features = FileSource(\n", - " path=\"/tmp/examples/feature_repo/data/user_features.parquet\",\n", + " path=\"{}\",\n", " event_timestamp_column=\"datetime\",\n", " created_timestamp_column=\"created\",\n", ")\n", @@ -1582,52 +1575,16 @@ " ],\n", " online=True,\n", " input=user_features,\n", - " tags={},\n", + " tags=dict(),\n", ")\n", - "'''\n", + "'''.format(os.path.join(BASE_DIR, 'feature_repo/data/','user_features.parquet'))\n", ")\n", "file.close()" ] }, { "cell_type": "code", - "execution_count": 40, - "id": "49c282ad-fcd2-448a-8a01-f6b0f0f325d5", - "metadata": {}, - "outputs": [], - "source": [ - "# %%writefile /Merlin/examples/Building-and-deploying-multi-stage-RecSys/feature_repo/item_features.py\n", - "#from google.protobuf.duration_pb2 import Duration\n", - "# import datetime \n", - "# from feast import Entity, Feature, FeatureView, ValueType\n", - "# from feast.infra.offline_stores.file_source import FileSource\n", - "\n", - "# item_features = FileSource(\n", - "# path=\"/Merlin/examples/Building-and-deploying-multi-stage-RecSys/feature_repo/data/item_features.parquet\",\n", - "# event_timestamp_column=\"datetime\",\n", - "# created_timestamp_column=\"created\",\n", - "# )\n", - "\n", - "# item = Entity(name=\"item_id\", value_type=ValueType.INT32, description=\"item id\",)\n", - "\n", - "# item_features_view = FeatureView(\n", - "# name=\"item_features\",\n", - "# entities=[\"item_id\"],\n", - "# ttl=Duration(seconds=86400 * 7),\n", - "# features=[\n", - "# Feature(name=\"item_category\", dtype=ValueType.INT32),\n", - "# Feature(name=\"item_shop\", dtype=ValueType.INT32),\n", - "# Feature(name=\"item_brand\", dtype=ValueType.INT32),\n", - "# ],\n", - "# online=True,\n", - "# input=item_features,\n", - "# tags={},\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 41, + "execution_count": 38, "id": "48a5927c-840d-410c-8f5b-bebce4f79640", "metadata": {}, "outputs": [], @@ -1641,7 +1598,7 @@ "from feast.infra.offline_stores.file_source import FileSource\n", "\n", "item_features = FileSource(\n", - " path=\"/tmp/examples/feature_repo/data/item_features.parquet\",\n", + " path=\"{}\",\n", " event_timestamp_column=\"datetime\",\n", " created_timestamp_column=\"created\",\n", ")\n", @@ -1659,9 +1616,9 @@ " ],\n", " online=True,\n", " input=item_features,\n", - " tags={},\n", + " tags=dict(),\n", ")\n", - "'''\n", + "'''.format(os.path.join(BASE_DIR, 'feature_repo/data/','item_features.parquet'))\n", " )\n", "file.close() " ] @@ -1676,19 +1633,18 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "id": "57133c1e-18d9-4ccb-9704-cdebd271985e", "metadata": {}, "outputs": [], "source": [ - "# install tree\n", - "# !apt-get update\n", - "# !apt-get install tree" + "# install seedir\n", + "!pip install seedir" ] }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 41, "id": "986d53ea-c946-4046-a390-6d3b8801d280", "metadata": {}, "outputs": [ @@ -1696,22 +1652,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[01;34m/tmp/examples/feature_repo\u001b[00m\n", - "├── __init__.py\n", - "├── \u001b[01;34mdata\u001b[00m\n", - "│   ├── item_features.parquet\n", - "│   └── user_features.parquet\n", - "├── feature_store.yaml\n", - "├── item_features.py\n", - "└── user_features.py\n", - "\n", - "1 directory, 6 files\n" + "feature_repo/\n", + "├─__init__.py\n", + "├─data/\n", + "│ ├─item_features.parquet\n", + "│ └─user_features.parquet\n", + "├─feature_store.yaml\n", + "├─item_features.py\n", + "└─user_features.py\n" ] } ], "source": [ + "import seedir as sd\n", "feature_repo_path = os.path.join(BASE_DIR, 'feature_repo')\n", - "!tree $feature_repo_path" + "sd.seedir(feature_repo_path, style='lines', itemlimit=10, depthlimit=3, exclude_folders='.ipynb_checkpoints', sort=True)" ] }, { @@ -1724,49 +1679,6 @@ "\n", "For the next step, move on to the `02-Deploying-multi-stage-Recsys-with-Merlin-Systems.ipynb` notebook to deploy our saved models as an ensemble to TIS and obtain prediction results for a qiven request." ] - }, - { - "cell_type": "markdown", - "id": "32a989d0-0e3a-48cd-b4a1-fa630e242db1", - "metadata": {}, - "source": [ - "string_items = '{\"ordered_ids\": array([[137],\\n [311],\\n [292],\\n [332],\\n [383],\\n [233],\\n [445],\\n [297],\\n [ 93],\\n [284]], dtype=int32)}'" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "4d2bb12f-a8a4-4b07-b6f1-08db2de8f656", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'\\nTest this is our file\\nwahat can we do\\npath = test\\nasdasd\\nhow does it look\\n'" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "'''\n", - "Test this is our file\n", - "wahat can we do\n", - "path = {}\n", - "asdasd\n", - "how does it look\n", - "'''.format('test')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dcdcc0b1-bd37-4d47-9158-decd1617711f", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb b/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb index 2b414e36f..c32bc1c3e 100644 --- a/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb +++ b/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb @@ -62,65 +62,29 @@ ] }, { - "cell_type": "code", - "execution_count": 2, - "id": "ea3756f8-a115-436a-b5d4-48f0641451b9", + "cell_type": "markdown", + "id": "a27e18d7-b3e4-481c-b69e-23193b212c56", "metadata": {}, - "outputs": [], "source": [ - "#%pip install tensorflow \"feast<0.20\" faiss-gpu" + "At this step, we assume you already installed the tensorflow-gpu, feast and faiss-gpu libraries when running the first notebook `01-Building-Recommender-Systems-with-Merlin.ipynb`. In case you need to install them, execute the following script in a cell.\n", + "```\n", + "%pip install tensorflow \"feast<0.20\" faiss-gpu\n", + "```" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "4db1b5f1-c8fa-4e03-8744-1197873c5bee", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/faiss/loader.py:28: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n", - " if LooseVersion(numpy.__version__) >= \"1.19\":\n", - "/usr/local/lib/python3.8/dist-packages/setuptools/_distutils/version.py:351: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n", - " other = LooseVersion(other)\n", - "05/04/2022 01:28:59 PM INFO:Loading faiss with AVX2 support.\n", - "05/04/2022 01:28:59 PM INFO:Could not load library with AVX2 support due to:\n", - "ModuleNotFoundError(\"No module named 'faiss.swigfaiss_avx2'\")\n", - "05/04/2022 01:28:59 PM INFO:Loading faiss.\n", - "05/04/2022 01:28:59 PM INFO:Successfully loaded faiss.\n", - "/usr/lib/python3.8/site-packages/cudf/utils/metadata/orc_column_statistics_pb2.py:19: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", - " DESCRIPTOR = _descriptor.FileDescriptor(\n", - "/usr/lib/python3.8/site-packages/cudf/utils/metadata/orc_column_statistics_pb2.py:37: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", - " _descriptor.FieldDescriptor(\n", - "/usr/lib/python3.8/site-packages/cudf/utils/metadata/orc_column_statistics_pb2.py:30: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", - " _INTEGERSTATISTICS = _descriptor.Descriptor(\n", - "/usr/lib/python3.8/site-packages/dask_cudf/core.py:32: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n", - " DASK_VERSION = LooseVersion(dask.__version__)\n", - "/usr/local/lib/python3.8/dist-packages/setuptools/_distutils/version.py:351: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n", - " other = LooseVersion(other)\n", - "/usr/local/lib/python3.8/dist-packages/tritonclient/grpc/model_config_pb2.py:19: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", - " DESCRIPTOR = _descriptor.FileDescriptor(\n", - "/usr/local/lib/python3.8/dist-packages/tritonclient/grpc/model_config_pb2.py:33: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", - " _descriptor.EnumValueDescriptor(\n", - "/usr/local/lib/python3.8/dist-packages/tritonclient/grpc/model_config_pb2.py:27: DeprecationWarning: Call to deprecated create function EnumDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", - " _DATATYPE = _descriptor.EnumDescriptor(\n", - "/usr/local/lib/python3.8/dist-packages/tritonclient/grpc/model_config_pb2.py:330: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", - " _descriptor.FieldDescriptor(\n", - "/usr/local/lib/python3.8/dist-packages/tritonclient/grpc/model_config_pb2.py:323: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", - " _MODELRATELIMITER_RESOURCE = _descriptor.Descriptor(\n", - "/usr/local/lib/python3.8/dist-packages/flatbuffers/compat.py:19: DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses\n", - " import imp\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "import numpy as np\n", "import pandas as pd\n", "import feast\n", "import faiss\n", + "import seedir as sd\n", "from nvtabular import ColumnSchema, Schema\n", "\n", "from merlin.systems.dag.ensemble import Ensemble\n", @@ -164,7 +128,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "e5fa545b-a979-4216-b176-ffd70d66e69d", "metadata": {}, "outputs": [ @@ -172,11 +136,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "/tmp/examples/feature_repo\n", + "/Merlin/examples/Building-and-deploying-multi-stage-RecSys/feature_repo\n", "/usr/local/lib/python3.8/dist-packages/feast/feature_view.py:100: DeprecationWarning: The argument 'input' is being deprecated. Please use 'batch_source' instead. Feast 0.13 and onwards will not support the argument 'input'.\n", " warnings.warn(\n", - "Created entity \u001b[1m\u001b[32muser_id\u001b[0m\n", "Created entity \u001b[1m\u001b[32mitem_id\u001b[0m\n", + "Created entity \u001b[1m\u001b[32muser_id\u001b[0m\n", "Created feature view \u001b[1m\u001b[32mitem_features\u001b[0m\n", "Created feature view \u001b[1m\u001b[32muser_features\u001b[0m\n", "\n", @@ -207,7 +171,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "52dacbbc-bdb6-4f7a-b202-3802050f0362", "metadata": {}, "outputs": [ @@ -218,9 +182,9 @@ "Materializing \u001b[1m\u001b[32m2\u001b[0m feature views from \u001b[1m\u001b[32m1995-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n", "\n", "\u001b[1m\u001b[32mitem_features\u001b[0m:\n", - "100%|███████████████████████████████████████████████████████████| 448/448 [00:00<00:00, 4483.47it/s]\n", + "100%|███████████████████████████████████████████████████████████| 448/448 [00:00<00:00, 4689.41it/s]\n", "\u001b[1m\u001b[32muser_features\u001b[0m:\n", - "100%|███████████████████████████████████████████████████████████| 453/453 [00:00<00:00, 1797.21it/s]\n" + "100%|███████████████████████████████████████████████████████████| 458/458 [00:00<00:00, 1477.71it/s]\n" ] } ], @@ -238,7 +202,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "id": "9caba4e3-e6e0-4e2f-b51d-cd3456fd4a63", "metadata": {}, "outputs": [ @@ -246,25 +210,23 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[01;34m/tmp/examples/feature_repo\u001b[00m\n", - "├── __init__.py\n", - "├── \u001b[01;34mdata\u001b[00m\n", - "│   ├── item_features.parquet\n", - "│   ├── online_store.db\n", - "│   ├── registry.db\n", - "│   └── user_features.parquet\n", - "├── feature_store.yaml\n", - "├── item_features.py\n", - "└── user_features.py\n", - "\n", - "1 directory, 8 files\n" + "feature_repo/\n", + "├─__init__.py\n", + "├─data/\n", + "│ ├─item_features.parquet\n", + "│ ├─online_store.db\n", + "│ ├─registry.db\n", + "│ └─user_features.parquet\n", + "├─feature_store.yaml\n", + "├─item_features.py\n", + "└─user_features.py\n" ] } ], "source": [ "# set up the base dir to for feature store\n", "feature_repo_path = os.path.join(BASE_DIR, 'feature_repo')\n", - "!tree $feature_repo_path" + "sd.seedir(feature_repo_path, style='lines', itemlimit=10, depthlimit=5, exclude_folders=['.ipynb_checkpoints', '__pycache__'], sort=True)" ] }, { @@ -285,7 +247,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "96b7adc1-623b-41df-b1f9-dd4086a15bc9", "metadata": {}, "outputs": [], @@ -304,7 +266,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "23ba59b5-08c3-44b5-86f2-e63dec6893af", "metadata": {}, "outputs": [], @@ -324,7 +286,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "6cdda540-8209-49f9-8b6a-4b330570fdd3", "metadata": {}, "outputs": [], @@ -348,7 +310,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "0b6cc5bf-d07c-4963-a748-6e2b4827ee36", "metadata": {}, "outputs": [ @@ -379,7 +341,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "3bc00e04-c70c-4882-9952-66f4dbb97bdc", "metadata": {}, "outputs": [], @@ -397,7 +359,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "3decbe7b-03e3-4978-baac-03f6a0b078c9", "metadata": {}, "outputs": [ @@ -432,24 +394,17 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "21139caa-3a51-42e6-b006-21a92c95f1bc", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "05/04/2022 01:29:23 PM INFO:init\n" - ] - }, { "data": { "text/plain": [ "" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -463,7 +418,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "47c2d9b1-51dc-4549-977d-d7941ee6486c", "metadata": {}, "outputs": [ @@ -471,11 +426,11 @@ "name": "stderr", "output_type": "stream", "text": [ - "2022-05-04 13:29:26.767925: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA\n", + "2022-05-05 14:38:32.696154: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2022-05-04 13:29:28.857665: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16254 MB memory: -> device: 0, name: Quadro GV100, pci bus id: 0000:15:00.0, compute capability: 7.0\n", - "2022-05-04 13:29:28.858387: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 30338 MB memory: -> device: 1, name: Quadro GV100, pci bus id: 0000:2d:00.0, compute capability: 7.0\n", - "05/04/2022 01:29:30 PM WARNING:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + "2022-05-05 14:38:34.750160: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 30667 MB memory: -> device: 0, name: Quadro GV100, pci bus id: 0000:15:00.0, compute capability: 7.0\n", + "2022-05-05 14:38:34.750858: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 30315 MB memory: -> device: 1, name: Quadro GV100, pci bus id: 0000:2d:00.0, compute capability: 7.0\n", + "05/05/2022 02:38:36 PM WARNING:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" ] } ], @@ -498,7 +453,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "b270f663-0ae1-4356-acd4-5f8c986abf4d", "metadata": {}, "outputs": [], @@ -522,7 +477,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "id": "eb0ef434-03a5-4a36-afb9-e19a43243c64", "metadata": {}, "outputs": [], @@ -557,7 +512,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "id": "ce31723e-af4d-4827-bb60-3a9fafcd9da6", "metadata": {}, "outputs": [], @@ -575,7 +530,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "id": "7f65598b-e3e7-4238-a73e-19d00c3deb26", "metadata": {}, "outputs": [], @@ -607,7 +562,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "id": "b28c452f-543c-45a4-9995-130ca6919669", "metadata": {}, "outputs": [], @@ -618,7 +573,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "id": "6c64d686-aed5-42f8-b517-482b4237c69f", "metadata": {}, "outputs": [], @@ -640,67 +595,65 @@ }, { "cell_type": "code", - "execution_count": 21, - "id": "b3c81b2a-4fca-497b-8edf-5403fe5a483a", + "execution_count": 32, + "id": "89182219-40a6-458c-af0e-7a8e83f364aa", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[01;34m/tmp/examples/poc_ensemble\u001b[00m\n", - "├── \u001b[01;34m0_queryfeast\u001b[00m\n", - "│   ├── \u001b[01;34m1\u001b[00m\n", - "│   │   └── model.py\n", - "│   └── config.pbtxt\n", - "├── \u001b[01;34m1_predicttensorflow\u001b[00m\n", - "│   ├── \u001b[01;34m1\u001b[00m\n", - "│   │   └── \u001b[01;34mmodel.savedmodel\u001b[00m\n", - "│   │   ├── \u001b[01;34massets\u001b[00m\n", - "│   │   ├── keras_metadata.pb\n", - "│   │   ├── saved_model.pb\n", - "│   │   └── \u001b[01;34mvariables\u001b[00m\n", - "│   │   ├── variables.data-00000-of-00001\n", - "│   │   └── variables.index\n", - "│   └── config.pbtxt\n", - "├── \u001b[01;34m2_queryfaiss\u001b[00m\n", - "│   ├── \u001b[01;34m1\u001b[00m\n", - "│   │   ├── \u001b[01;34mindex.faiss\u001b[00m\n", - "│   │   │   └── index.faiss\n", - "│   │   └── model.py\n", - "│   └── config.pbtxt\n", - "├── \u001b[01;34m3_queryfeast\u001b[00m\n", - "│   ├── \u001b[01;34m1\u001b[00m\n", - "│   │   └── model.py\n", - "│   └── config.pbtxt\n", - "├── \u001b[01;34m4_unrollfeatures\u001b[00m\n", - "│   ├── \u001b[01;34m1\u001b[00m\n", - "│   │   └── model.py\n", - "│   └── config.pbtxt\n", - "├── \u001b[01;34m5_predicttensorflow\u001b[00m\n", - "│   ├── \u001b[01;34m1\u001b[00m\n", - "│   │   └── \u001b[01;34mmodel.savedmodel\u001b[00m\n", - "│   │   ├── \u001b[01;34massets\u001b[00m\n", - "│   │   ├── keras_metadata.pb\n", - "│   │   ├── saved_model.pb\n", - "│   │   └── \u001b[01;34mvariables\u001b[00m\n", - "│   │   ├── variables.data-00000-of-00001\n", - "│   │   └── variables.index\n", - "│   └── config.pbtxt\n", - "├── \u001b[01;34m6_softmaxsampling\u001b[00m\n", - "│   ├── \u001b[01;34m1\u001b[00m\n", - "│   │   └── model.py\n", - "│   └── config.pbtxt\n", - "└── \u001b[01;34mensemble_model\u001b[00m\n", - " ├── \u001b[01;34m1\u001b[00m\n", - " └── config.pbtxt\n", - "\n", - "23 directories, 22 files\n" + "poc_ensemble/\n", + "├─0_queryfeast/\n", + "│ ├─1/\n", + "│ │ └─model.py\n", + "│ └─config.pbtxt\n", + "├─1_predicttensorflow/\n", + "│ ├─1/\n", + "│ │ └─model.savedmodel/\n", + "│ │ ├─assets/\n", + "│ │ ├─keras_metadata.pb\n", + "│ │ ├─saved_model.pb\n", + "│ │ └─variables/\n", + "│ │ ├─variables.data-00000-of-00001\n", + "│ │ └─variables.index\n", + "│ └─config.pbtxt\n", + "├─2_queryfaiss/\n", + "│ ├─1/\n", + "│ │ ├─index.faiss/\n", + "│ │ │ └─index.faiss\n", + "│ │ └─model.py\n", + "│ └─config.pbtxt\n", + "├─3_queryfeast/\n", + "│ ├─1/\n", + "│ │ └─model.py\n", + "│ └─config.pbtxt\n", + "├─4_unrollfeatures/\n", + "│ ├─1/\n", + "│ │ └─model.py\n", + "│ └─config.pbtxt\n", + "├─5_predicttensorflow/\n", + "│ ├─1/\n", + "│ │ └─model.savedmodel/\n", + "│ │ ├─assets/\n", + "│ │ ├─keras_metadata.pb\n", + "│ │ ├─saved_model.pb\n", + "│ │ └─variables/\n", + "│ │ ├─variables.data-00000-of-00001\n", + "│ │ └─variables.index\n", + "│ └─config.pbtxt\n", + "├─6_softmaxsampling/\n", + "│ ├─1/\n", + "│ │ └─model.py\n", + "│ └─config.pbtxt\n", + "└─ensemble_model/\n", + " ├─1/\n", + " └─config.pbtxt\n" ] } ], "source": [ - "!tree $export_path" + "sd.seedir(export_path, style='lines', itemlimit=10, depthlimit=5, exclude_folders=['.ipynb_checkpoints', '__pycache__'], sort=True)" ] }, { @@ -745,7 +698,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "id": "e95f1d85-9cbc-423b-9de1-91d1e421e5e4", "metadata": {}, "outputs": [], @@ -762,37 +715,26 @@ }, { "cell_type": "code", - "execution_count": 24, - "id": "bab9f38d-45af-4eec-9fd5-c29db901f51c", - "metadata": {}, - "outputs": [], - "source": [ - "# from merlin.systems.triton.utils import run_ensemble_on_tritonserver\n", - "# run_ensemble_on_tritonserver(export_path, ['ordered_ids'], request, 'ensemble_model')" - ] - }, - { - "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "id": "74ec62f2-5935-45c6-8058-e1cdade6f80f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'ordered_ids': array([[ 59],\n", - " [102],\n", - " [223],\n", - " [ 72],\n", - " [156],\n", - " [ 16],\n", - " [412],\n", - " [446],\n", - " [225],\n", - " [149]], dtype=int32)}" + "{'ordered_ids': array([[309],\n", + " [169],\n", + " [ 82],\n", + " [343],\n", + " [193],\n", + " [205],\n", + " [334],\n", + " [437],\n", + " [157],\n", + " [ 98]], dtype=int32)}" ] }, - "execution_count": 25, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } diff --git a/tests/examples/test_building_deploying_multi_stage_RecSys.py b/tests/examples/test_building_deploying_multi_stage_RecSys.py index 346f18b87..d1dc087bb 100644 --- a/tests/examples/test_building_deploying_multi_stage_RecSys.py +++ b/tests/examples/test_building_deploying_multi_stage_RecSys.py @@ -3,7 +3,6 @@ from testbook import testbook from tests.conftest import REPO_ROOT -from merlin.systems.triton.utils import run_ensemble_on_tritonserver @testbook( @@ -11,18 +10,17 @@ / "examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb", execute=False, ) - def test_func(tb1): tb1.inject( """ import os os.environ["DATA_FOLDER"] = "/tmp/data/" os.environ["NUM_ROWS"] = "10000" - os.system('mkdir -p /tmp/examples') + os.system("mkdir -p /tmp/examples") os.environ["BASE_DIR"] = "/tmp/examples/" """ ) - #tb1.execute() + tb1.execute() assert os.path.isdir("/tmp/examples/dlrm") assert os.path.isdir("/tmp/examples/feature_repo") assert os.path.isdir("/tmp/examples/query_tower") @@ -31,7 +29,8 @@ def test_func(tb1): assert os.path.isfile("/tmp/examples/feature_repo/item_features.py") with testbook( - "/Merlin/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb", + REPO_ROOT + / "examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb", execute=False, ) as tb2: tb2.inject( @@ -45,18 +44,21 @@ def test_func(tb1): tb2.execute_cell(list(range(0, NUM_OF_CELLS - 3))) top_k = tb2.ref("top_k") outputs = tb2.ref("outputs") - request = tb2.ref('request') + request = tb2.ref("request") assert outputs[0] == "ordered_ids" tb2.inject( """ + import shutil from merlin.models.loader.tf_utils import configure_tensorflow configure_tensorflow() from merlin.systems.triton.utils import run_ensemble_on_tritonserver - response = run_ensemble_on_tritonserver('/tmp/examples/poc_ensemble', outputs, request, 'ensemble_model') - + response = run_ensemble_on_tritonserver( + "/tmp/examples/poc_ensemble", outputs, request, "ensemble_model" + ) + response = [x.tolist()[0] for x in response["ordered_ids"]] + shutil.rmtree("/tmp/examples/", ignore_errors=True) """ ) tb2.execute_cell(NUM_OF_CELLS - 2) - response = tb2.ref('response') - print(response) - #assert len(response[outputs[0]]) == top_k \ No newline at end of file + response = tb2.ref("response") + assert len(response) == top_k