From 1053f5b1e6921c5db21b3677661f218fe72f2ee3 Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Wed, 5 Apr 2023 16:02:18 -0400 Subject: [PATCH 1/9] add config for vgg16 tils model --- wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml diff --git a/wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml b/wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml new file mode 100644 index 0000000..4f4a366 --- /dev/null +++ b/wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml @@ -0,0 +1,20 @@ +# Configuration of a tumor infiltrating lymphocyte detection model (VGG16). +version: "1.0" +architecture: vgg16 +name: TCGA-TILs-v1 +url: https://stonybrookmedicine.box.com/shared/static/t7mbqcvv4fsd6jzzgf8pl1imvyobeg1x.pt +url_file_name: vgg16-tils-v1-20220112-f7988712.pt +num_classes: 2 +transform: + resize_size: 224 + # Normalize to [-1, 1] + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] +patch_size_pixels: 100 +spacing_um_px: 0.5 +class_names: + - notils + - tils +metadata: + notes: | + Original code and data are available at https://stonybrookmedicine.app.box.com/folder/128593362243 From 10941f37ee71c4b062b38da96943bcb75f627722 Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Wed, 5 Apr 2023 17:39:35 -0400 Subject: [PATCH 2/9] transpose weights in vgg_16/fc6 from tf --- scripts/convert_tf_to_pytorch_til_vgg16.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/convert_tf_to_pytorch_til_vgg16.py b/scripts/convert_tf_to_pytorch_til_vgg16.py index 4c86338..bbc5329 100644 --- a/scripts/convert_tf_to_pytorch_til_vgg16.py +++ b/scripts/convert_tf_to_pytorch_til_vgg16.py @@ -66,6 +66,7 @@ def convert_tf_to_pytorch(input_path, output_path, num_classes: int): elif "fc" in tf_weights: if tf_weights == "vgg_16/fc6/weights": # [7, 7, 512, 4096] -> [25088, 4096] + tf_weight_array = tf_weight_array.transpose([2, 0, 1, 3]) tf_weight_array = tf_weight_array.reshape((25088, 4096)) # E.g., go from shape [1, 1, 4096, 1000] to [1000, 4096] tf_weight_array = tf_weight_array.squeeze().T From 3fdcbb3685328d0427d315a786e309fc1887766b Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Thu, 6 Apr 2023 07:56:37 -0400 Subject: [PATCH 3/9] add mean=sample and std=sample to config Setting mean=sample and std=sample indicates that each image should be normalized to have mean 0 and variance 1. --- wsinfer/_modellib/models.py | 42 ++++++++++++++++------- wsinfer/_modellib/transforms.py | 15 +++++--- wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml | 4 +-- 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/wsinfer/_modellib/models.py b/wsinfer/_modellib/models.py index 296db55..eb58ea6 100644 --- a/wsinfer/_modellib/models.py +++ b/wsinfer/_modellib/models.py @@ -138,14 +138,22 @@ def _validate_input(d: dict, config_path: Path) -> None: raise ValueError("'num_classes' must be an integer") if not isinstance(d["transform"]["resize_size"], int): raise ValueError("'transform.resize_size' must be an integer") - if not isinstance(d["transform"]["mean"], list): - raise ValueError("'transform.mean' must be a list") - if not all(isinstance(num, float) for num in d["transform"]["mean"]): - raise ValueError("'transform.mean' must be a list of floats") - if not isinstance(d["transform"]["std"], list): - raise ValueError("'transform.std' must be a list") - if not all(isinstance(num, float) for num in d["transform"]["std"]): - raise ValueError("'transform.std' must be a list of floats") + if ( + not isinstance(d["transform"]["mean"], list) + and d["transform"]["mean"] != "sample" + ): + raise ValueError("'transform.mean' must be a list or 'sample'") + if isinstance(d["transform"]["mean"], list): + if not all(isinstance(num, float) for num in d["transform"]["mean"]): + raise ValueError("'transform.mean' must be a list of floats") + if ( + not isinstance(d["transform"]["std"], list) + and d["transform"]["std"] != "sample" + ): + raise ValueError("'transform.std' must be a list or 'sample'") + if isinstance(d["transform"]["std"], list): + if not all(isinstance(num, float) for num in d["transform"]["std"]): + raise ValueError("'transform.std' must be a list of floats") if not isinstance(d["patch_size_pixels"], int) or d["patch_size_pixels"] <= 0: raise ValueError("patch_size_pixels must be a positive integer") if not isinstance(d["spacing_um_px"], float) or d["spacing_um_px"] <= 0: @@ -156,10 +164,20 @@ def _validate_input(d: dict, config_path: Path) -> None: raise ValueError("'class_names' must be a list of strings") # Validate values. - if len(d["transform"]["mean"]) != 3: - raise ValueError("transform.mean must be a list of three numbers") - if len(d["transform"]["std"]) != 3: - raise ValueError("transform.std must be a list of three numbers") + # mean and std must be either a list of three floats or 'sample'. In the case + # of 'sample', each image is normalized to have mean 0 and variance 1. + if isinstance(d["transform"]["mean"], list): + if len(d["transform"]["mean"]) != 3: + raise ValueError("transform.mean must be a list of three numbers") + else: + if d["transform"]["mean"] != "sample": + raise ValueError("transform.mean must be 'sample' if not a list") + if isinstance(d["transform"]["std"], list): + if len(d["transform"]["std"]) != 3: + raise ValueError("transform.std must be a list of three numbers") + else: + if d["transform"]["mean"] != "sample": + raise ValueError("transform.mean must be 'sample' if not a list") if len(d["class_names"]) != len(set(d["class_names"])): raise ValueError("duplicate values found in 'class_names'") if len(d["class_names"]) != d["num_classes"]: diff --git a/wsinfer/_modellib/transforms.py b/wsinfer/_modellib/transforms.py index 1f329f4..e8f4cd7 100644 --- a/wsinfer/_modellib/transforms.py +++ b/wsinfer/_modellib/transforms.py @@ -4,6 +4,7 @@ https://github.com/pytorch/vision/blob/528651a031a08f9f97cc75bd619a326387708219/torchvision/transforms/_presets.py#L1 """ +from typing import Literal from typing import Tuple from typing import Union @@ -37,14 +38,14 @@ def __init__( self, *, resize_size: int, - mean: Tuple[float, float, float], - std: Tuple[float, float, float], + mean: Union[Tuple[float, float, float], Literal["sample"]], + std: Union[Tuple[float, float, float], Literal["sample"]], interpolation=BILINEAR, ) -> None: super().__init__() self.resize_size = resize_size - self.mean = list(mean) - self.std = list(std) + self.mean = mean + self.std = std self.interpolation = interpolation def __repr__(self): @@ -62,5 +63,9 @@ def forward(self, input: Union[Image.Image, torch.Tensor]) -> torch.Tensor: if not isinstance(img, torch.Tensor): img = F.pil_to_tensor(img) img = F.convert_image_dtype(img, torch.float) - img = F.normalize(img, mean=self.mean, std=self.std) + if self.mean == "sample" or self.std == "sample": + # Normalize by sample mean and std dev so image has mean 0 and var 1. + img = (img - img.mean()) / img.std() + else: + img = F.normalize(img, mean=self.mean, std=self.std) return img diff --git a/wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml b/wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml index 4f4a366..dd27891 100644 --- a/wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml +++ b/wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml @@ -8,8 +8,8 @@ num_classes: 2 transform: resize_size: 224 # Normalize to [-1, 1] - mean: [0.5, 0.5, 0.5] - std: [0.5, 0.5, 0.5] + mean: sample + std: sample patch_size_pixels: 100 spacing_um_px: 0.5 class_names: From db27ca40c3bca9d94c4943eff866483b95f9a8c7 Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Thu, 6 Apr 2023 19:48:42 -0400 Subject: [PATCH 4/9] Revert "add mean=sample and std=sample to config" This reverts commit 3fdcbb3685328d0427d315a786e309fc1887766b. --- wsinfer/_modellib/models.py | 42 +++++++---------------- wsinfer/_modellib/transforms.py | 15 +++----- wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml | 4 +-- 3 files changed, 19 insertions(+), 42 deletions(-) diff --git a/wsinfer/_modellib/models.py b/wsinfer/_modellib/models.py index eb58ea6..296db55 100644 --- a/wsinfer/_modellib/models.py +++ b/wsinfer/_modellib/models.py @@ -138,22 +138,14 @@ def _validate_input(d: dict, config_path: Path) -> None: raise ValueError("'num_classes' must be an integer") if not isinstance(d["transform"]["resize_size"], int): raise ValueError("'transform.resize_size' must be an integer") - if ( - not isinstance(d["transform"]["mean"], list) - and d["transform"]["mean"] != "sample" - ): - raise ValueError("'transform.mean' must be a list or 'sample'") - if isinstance(d["transform"]["mean"], list): - if not all(isinstance(num, float) for num in d["transform"]["mean"]): - raise ValueError("'transform.mean' must be a list of floats") - if ( - not isinstance(d["transform"]["std"], list) - and d["transform"]["std"] != "sample" - ): - raise ValueError("'transform.std' must be a list or 'sample'") - if isinstance(d["transform"]["std"], list): - if not all(isinstance(num, float) for num in d["transform"]["std"]): - raise ValueError("'transform.std' must be a list of floats") + if not isinstance(d["transform"]["mean"], list): + raise ValueError("'transform.mean' must be a list") + if not all(isinstance(num, float) for num in d["transform"]["mean"]): + raise ValueError("'transform.mean' must be a list of floats") + if not isinstance(d["transform"]["std"], list): + raise ValueError("'transform.std' must be a list") + if not all(isinstance(num, float) for num in d["transform"]["std"]): + raise ValueError("'transform.std' must be a list of floats") if not isinstance(d["patch_size_pixels"], int) or d["patch_size_pixels"] <= 0: raise ValueError("patch_size_pixels must be a positive integer") if not isinstance(d["spacing_um_px"], float) or d["spacing_um_px"] <= 0: @@ -164,20 +156,10 @@ def _validate_input(d: dict, config_path: Path) -> None: raise ValueError("'class_names' must be a list of strings") # Validate values. - # mean and std must be either a list of three floats or 'sample'. In the case - # of 'sample', each image is normalized to have mean 0 and variance 1. - if isinstance(d["transform"]["mean"], list): - if len(d["transform"]["mean"]) != 3: - raise ValueError("transform.mean must be a list of three numbers") - else: - if d["transform"]["mean"] != "sample": - raise ValueError("transform.mean must be 'sample' if not a list") - if isinstance(d["transform"]["std"], list): - if len(d["transform"]["std"]) != 3: - raise ValueError("transform.std must be a list of three numbers") - else: - if d["transform"]["mean"] != "sample": - raise ValueError("transform.mean must be 'sample' if not a list") + if len(d["transform"]["mean"]) != 3: + raise ValueError("transform.mean must be a list of three numbers") + if len(d["transform"]["std"]) != 3: + raise ValueError("transform.std must be a list of three numbers") if len(d["class_names"]) != len(set(d["class_names"])): raise ValueError("duplicate values found in 'class_names'") if len(d["class_names"]) != d["num_classes"]: diff --git a/wsinfer/_modellib/transforms.py b/wsinfer/_modellib/transforms.py index e8f4cd7..1f329f4 100644 --- a/wsinfer/_modellib/transforms.py +++ b/wsinfer/_modellib/transforms.py @@ -4,7 +4,6 @@ https://github.com/pytorch/vision/blob/528651a031a08f9f97cc75bd619a326387708219/torchvision/transforms/_presets.py#L1 """ -from typing import Literal from typing import Tuple from typing import Union @@ -38,14 +37,14 @@ def __init__( self, *, resize_size: int, - mean: Union[Tuple[float, float, float], Literal["sample"]], - std: Union[Tuple[float, float, float], Literal["sample"]], + mean: Tuple[float, float, float], + std: Tuple[float, float, float], interpolation=BILINEAR, ) -> None: super().__init__() self.resize_size = resize_size - self.mean = mean - self.std = std + self.mean = list(mean) + self.std = list(std) self.interpolation = interpolation def __repr__(self): @@ -63,9 +62,5 @@ def forward(self, input: Union[Image.Image, torch.Tensor]) -> torch.Tensor: if not isinstance(img, torch.Tensor): img = F.pil_to_tensor(img) img = F.convert_image_dtype(img, torch.float) - if self.mean == "sample" or self.std == "sample": - # Normalize by sample mean and std dev so image has mean 0 and var 1. - img = (img - img.mean()) / img.std() - else: - img = F.normalize(img, mean=self.mean, std=self.std) + img = F.normalize(img, mean=self.mean, std=self.std) return img diff --git a/wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml b/wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml index dd27891..4f4a366 100644 --- a/wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml +++ b/wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml @@ -8,8 +8,8 @@ num_classes: 2 transform: resize_size: 224 # Normalize to [-1, 1] - mean: sample - std: sample + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] patch_size_pixels: 100 spacing_um_px: 0.5 class_names: From d0661aa5045917e587e4ef3202de7f0135ab2a24 Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Thu, 6 Apr 2023 20:04:10 -0400 Subject: [PATCH 5/9] update url and notes for vgg16 tils model --- wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml b/wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml index 4f4a366..158d933 100644 --- a/wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml +++ b/wsinfer/modeldefs/vgg16_tcga-tils-v1.yaml @@ -2,8 +2,8 @@ version: "1.0" architecture: vgg16 name: TCGA-TILs-v1 -url: https://stonybrookmedicine.box.com/shared/static/t7mbqcvv4fsd6jzzgf8pl1imvyobeg1x.pt -url_file_name: vgg16-tils-v1-20220112-f7988712.pt +url: https://stonybrookmedicine.box.com/shared/static/0orxxw2aai3l3lztetvukwqetvr3z4lr.pt +url_file_name: vgg16-tils-20220112-3088cb70.pt num_classes: 2 transform: resize_size: 224 @@ -17,4 +17,4 @@ class_names: - tils metadata: notes: | - Original code and data are available at https://stonybrookmedicine.app.box.com/folder/128593362243 + Original code available at https://github.com/SBU-BMI/u24_lymphocyte. From 087bd20ad3a8a5754f576b8aa1b4042fbb0d3d7b Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Thu, 6 Apr 2023 20:30:03 -0400 Subject: [PATCH 6/9] build tils vgg16 model --- scripts/build_docker_images.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/build_docker_images.sh b/scripts/build_docker_images.sh index b9aedf4..27ddd63 100644 --- a/scripts/build_docker_images.sh +++ b/scripts/build_docker_images.sh @@ -44,6 +44,9 @@ docker build -t kaczmarj/wsinfer:$version . # TILs build $version-tils dockerfiles/tils.dockerfile +# TILs VGG16 +build $version-tils-vgg16 dockerfiles/tils-vgg16.dockerfile + # Tumor BRCA build $version-tumor-brca dockerfiles/tumor-brca.dockerfile @@ -64,6 +67,7 @@ else echo "Pushing images." docker push kaczmarj/wsinfer:$version docker push kaczmarj/wsinfer:$version-tils + docker push kaczmarj/wsinfer:$version-tils-vgg16 docker push kaczmarj/wsinfer:$version-tumor-brca docker push kaczmarj/wsinfer:$version-tumor-luad docker push kaczmarj/wsinfer:$version-tumor-paad From 5d5c68d79059f3222def82df401de091b090f9a7 Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Thu, 6 Apr 2023 20:30:35 -0400 Subject: [PATCH 7/9] add dockerfiles for tils vgg16 model --- dockerfiles/tils-vgg16.dockerfile | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 dockerfiles/tils-vgg16.dockerfile diff --git a/dockerfiles/tils-vgg16.dockerfile b/dockerfiles/tils-vgg16.dockerfile new file mode 100644 index 0000000..c6a318f --- /dev/null +++ b/dockerfiles/tils-vgg16.dockerfile @@ -0,0 +1,14 @@ +# Tumor-infiltrating lymphocyte detection model. +# +# Note about versioning: We should not use the 'latest' tag because it is a moving +# target. We should prefer using a versioned release of the wsinfer pipeline. +FROM kaczmarj/wsinfer:0.3.6 + +# The CLI will use these env vars for model and weights. +ENV WSINFER_MODEL="vgg16" +ENV WSINFER_WEIGHTS="TCGA-TILs-v1" + +# Download the weights. +RUN python -c "from wsinfer import get_model_weights; get_model_weights(architecture=\"$WSINFER_MODEL\", name=\"$WSINFER_WEIGHTS\").load_model()" \ + # Downloaded models are mode 0600. Make them readable by all users. + && chmod -R +r $TORCH_HOME/hub/checkpoints/ From b19211d4d748f8b1303ae4897c45a940196656ab Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Thu, 6 Apr 2023 20:50:37 -0400 Subject: [PATCH 8/9] add test for vgg16 tils model --- tests/test_all.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/test_all.py b/tests/test_all.py index c94af0b..2b7dcac 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -239,6 +239,15 @@ def test_cli_run_args(tmp_path: Path): 200, 441, ), + # VGG16 TCGA-TILs-v1 + ( + "vgg16", + "TCGA-TILs-v1", + ["notils", "tils"], + [0.998769, 0.001231], + 200, + 441, + ), # Vgg16mod TCGA-BRCA-v1 ( "vgg16mod", @@ -950,7 +959,7 @@ def test_jit_compile(model_name: str, weights_name: str): out_nojit = model(x).detach().cpu() time_nojit = time.perf_counter() - t0 model_nojit = model - model = jit_compile(model) + model = jit_compile(model) # type: ignore if model is model_nojit: pytest.skip("Failed to compile model (would use original model)") with torch.no_grad(): From d2b34c3acefe6af0a51173a8a040607eda589ac7 Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Thu, 6 Apr 2023 21:18:51 -0400 Subject: [PATCH 9/9] make expected values more precise for vgg16 tils model --- tests/test_all.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_all.py b/tests/test_all.py index 2b7dcac..2c53263 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -244,7 +244,7 @@ def test_cli_run_args(tmp_path: Path): "vgg16", "TCGA-TILs-v1", ["notils", "tils"], - [0.998769, 0.001231], + [0.9987693428993224, 0.0012305785203352], 200, 441, ),