From a7e7d6eaf82047f9e491e10a74d59b20b4e5951f Mon Sep 17 00:00:00 2001 From: ChengyuZhu6 Date: Wed, 11 Sep 2024 15:48:58 +0800 Subject: [PATCH] security-fix: resolve security issue with starting a process with a shell - Issue: Fixed [B605:start_process_with_a_shell] identified by Bandit, which flagged starting a process with a shell as a high-severity security risk (CWE-78: OS Command Injection). - Replaced os.system call with a safer alternative to prevent shell injection vulnerabilities. Signed-off-by: ChengyuZhu6 --- binaries/build.py | 12 +- binaries/conda/build_packages.py | 40 ++- binaries/install.py | 23 +- binaries/upload.py | 17 +- .../near_real_time_video/create_mar_file.py | 6 +- examples/torchrec_dlrm/create_dlrm_mar.py | 5 +- .../test_distributed_inference_handler.py | 12 +- test/pytest/test_handler.py | 24 +- test/pytest/test_sm_mme_requirements.py | 33 ++- ts_scripts/api_utils.py | 232 +++++++++++++++--- ts_scripts/backend_utils.py | 16 +- ts_scripts/frontend_utils.py | 9 +- ts_scripts/install_dependencies.py | 199 +++++++++------ ts_scripts/install_from_src.py | 12 +- ts_scripts/modelarchiver_utils.py | 38 ++- ts_scripts/regression_utils.py | 57 +++-- ts_scripts/sanity_utils.py | 39 ++- ts_scripts/tsutils.py | 23 +- ts_scripts/utils.py | 28 ++- ts_scripts/workflow_archiver_utils.py | 21 +- 20 files changed, 588 insertions(+), 258 deletions(-) diff --git a/binaries/build.py b/binaries/build.py index da196da955..39f3e0234a 100644 --- a/binaries/build.py +++ b/binaries/build.py @@ -1,6 +1,8 @@ import argparse import glob import os +import shlex +import subprocess import sys # To help discover local modules @@ -49,10 +51,12 @@ def build_dist_whl(args): print(f"## In directory: {os.getcwd()} | Executing command: {cur_wheel_cmd}") if not args.dry_run: - build_exit_code = os.system(cur_wheel_cmd) - # If any one of the steps fail, exit with error - if build_exit_code != 0: - sys.exit(f"## {binary} build Failed !") + try: + cur_wheel_cmd_list = shlex.split(cur_wheel_cmd) + subprocess.run(cur_wheel_cmd_list, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except subprocess.CalledProcessError as e: + print(f"## {binary} build Failed! Error: {e.stderr.decode()}") + sys.exit(1) def build(args): diff --git a/binaries/conda/build_packages.py b/binaries/conda/build_packages.py index 00b9e9c13b..5f5e1e54c2 100644 --- a/binaries/conda/build_packages.py +++ b/binaries/conda/build_packages.py @@ -1,19 +1,16 @@ import argparse import os +import subprocess from datetime import date -from ts_scripts.utils import try_and_handle +from ts_scripts.utils import try_and_handle, find_conda_binary conda_build_dir = os.path.dirname(os.path.abspath(__file__)) REPO_ROOT = os.path.join(conda_build_dir, "..", "..") MINICONDA_DOWNLOAD_URL = ( "https://repo.anaconda.com/miniconda/Miniconda3-py39_4.9.2-Linux-x86_64.sh" ) -CONDA_BINARY = ( - os.popen("which conda").read().strip() - if os.system(f"conda --version") == 0 - else f"$HOME/miniconda/condabin/conda" -) +CONDA_BINARY = find_conda_binary() CONDA_PACKAGES_PATH = os.path.join(REPO_ROOT, "binaries", "conda", "output") CONDA_LINUX_PACKAGES_PATH = os.path.join( @@ -32,8 +29,7 @@ if os.name == "nt": # Assumes miniconda is installed in windows - CONDA_BINARY = "conda" - + CONDA_BINARY = "conda" def add_nightly_suffix_conda(binary_name: str) -> str: """ @@ -52,15 +48,15 @@ def install_conda_build(dry_run): """ # Check if conda binary already exists - exit_code = os.system(f"conda --version") - if exit_code == 0: - print( - f"'conda' already present on the system. Proceeding without a fresh conda installation." - ) + try: + subprocess.run(["conda", "--version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + print("'conda' already present on the system. Proceeding without a fresh conda installation.") return - try_and_handle( - f"{CONDA_BINARY} install python=3.8 conda-build anaconda-client -y", dry_run - ) + except subprocess.CalledProcessError: + # Conda is not available, proceed with installation + try_and_handle( + f"{CONDA_BINARY} install python=3.8 conda-build anaconda-client -y", dry_run + ) def install_miniconda(dry_run): @@ -68,13 +64,13 @@ def install_miniconda(dry_run): Installs miniconda, a slimmer anaconda installation to build conda packages """ - # Check if conda binary already exists - exit_code = os.system(f"conda --version") - if exit_code == 0: - print( - f"'conda' already present on the system. Proceeding without a fresh minconda installation." - ) + try: + subprocess.run(["conda", "--version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + print("'conda' already present on the system. Proceeding without a fresh conda installation.") return + except subprocess.CalledProcessError as e: + raise (e) + if os.name == "nt": print( "Identified as Windows system. Please install miniconda using this URL: https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe" diff --git a/binaries/install.py b/binaries/install.py index 04b151eb7c..05fbaccb9d 100644 --- a/binaries/install.py +++ b/binaries/install.py @@ -1,4 +1,5 @@ import os +import subprocess import sys import glob @@ -13,19 +14,25 @@ def install(): if is_conda_env(): print("## Using conda to install torchserve and torch-model-archiver") channel_dir = os.path.abspath(os.path.join(REPO_ROOT, "binaries", "conda", "output")) - conda_cmd = f"conda install --channel {channel_dir} -y torchserve torch-model-archiver" - print(f"## In directory: {os.getcwd()} | Executing command: {conda_cmd}") - install_exit_code = os.system(conda_cmd) + conda_cmd = ["conda", "install", "--channel", channel_dir, "-y", "torchserve", "torch-model-archiver"] + print(f"## In directory: {os.getcwd()} | Executing command: {' '.join(conda_cmd)}") + + try: + subprocess.run(conda_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except subprocess.CalledProcessError as e: + sys.exit("## Torchserve/Model archiver Installation Failed!") + else: print("## Using pip to install torchserve and torch-model-archiver") ts_wheel = glob.glob(os.path.join(REPO_ROOT, "dist", "*.whl"))[0] ma_wheel = glob.glob(os.path.join(REPO_ROOT, "model-archiver", "dist", "*.whl"))[0] - pip_cmd = f"pip install {ts_wheel} {ma_wheel}" - print(f"## In directory: {os.getcwd()} | Executing command: {pip_cmd}") - install_exit_code = os.system(pip_cmd) + pip_cmd = ["pip", "install", ts_wheel, ma_wheel] + print(f"## In directory: {os.getcwd()} | Executing command: {' '.join(pip_cmd)}") - if install_exit_code != 0: - sys.exit("## Torchserve \ Model archiver Installation Failed !") + try: + subprocess.run(pip_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except subprocess.CalledProcessError as e: + sys.exit("## Torchserve/Model archiver Installation Failed!") if __name__ == "__main__": diff --git a/binaries/upload.py b/binaries/upload.py index 328f104b67..618da6a551 100755 --- a/binaries/upload.py +++ b/binaries/upload.py @@ -3,6 +3,7 @@ import glob import os import sys +import subprocess # To help discover local modules REPO_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") @@ -44,11 +45,17 @@ def upload_conda_packages(args, PACKAGES, CONDA_PACKAGES_PATH): "tar.bz2" ): print(f"Uploading to anaconda package: {name}") - anaconda_upload_command = f"anaconda upload {file_path} --force" - exit_code = os.system(anaconda_upload_command) - if exit_code != 0: - print(f"Anaconda package upload failed for package {name}") - return exit_code + anaconda_upload_command = ["anaconda", "upload", file_path, "--force"] + + try: + subprocess.run( + anaconda_upload_command, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + except subprocess.CalledProcessError as e: + return e.returncode print(f"All packages uploaded to anaconda successfully") diff --git a/examples/image_classifier/near_real_time_video/create_mar_file.py b/examples/image_classifier/near_real_time_video/create_mar_file.py index 8c56642b05..c276811b21 100644 --- a/examples/image_classifier/near_real_time_video/create_mar_file.py +++ b/examples/image_classifier/near_real_time_video/create_mar_file.py @@ -5,6 +5,7 @@ import argparse import os import shutil +import subprocess MODEL_PTH_FILE = "resnet18-f37072fd.pth" MODEL_STORE = "model_store" @@ -15,7 +16,7 @@ def download_pth_file(output_file: str) -> None: if not os.path.exists(output_file): cmd = ["wget", " https://download.pytorch.org/models/resnet18-f37072fd.pth"] print("Downloading resnet-18 pth file") - os.system(" ".join(cmd)) + subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) def create_mar(): @@ -42,9 +43,8 @@ def create_mar(): "--handler image_classifier", "--force", ] - print(f"Archiving resnet-18 model into {MAR_FILE}") - os.system(" ".join(cmd)) + subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) def move_mar_file(): diff --git a/examples/torchrec_dlrm/create_dlrm_mar.py b/examples/torchrec_dlrm/create_dlrm_mar.py index 7d1722b0dc..dc5ed3752a 100644 --- a/examples/torchrec_dlrm/create_dlrm_mar.py +++ b/examples/torchrec_dlrm/create_dlrm_mar.py @@ -2,8 +2,7 @@ This script creates a DLRM model and packs it into a TorchServe mar file """ -import os - +import subprocess import torch from dlrm_factory import DLRMFactory @@ -32,7 +31,7 @@ def main(): ] print("Archiving model into dlrm.mar") - os.system(" ".join(cmd)) + subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) print("Done") diff --git a/test/pytest/test_distributed_inference_handler.py b/test/pytest/test_distributed_inference_handler.py index b5ab178e5d..e810e6d3d9 100644 --- a/test/pytest/test_distributed_inference_handler.py +++ b/test/pytest/test_distributed_inference_handler.py @@ -1,6 +1,6 @@ import os import sys - +import subprocess import pytest import test_utils @@ -37,10 +37,14 @@ def test_large_model_inference(): ) try: - command = f"newman run -e {POSTMAN_ENV_FILE} {POSTMAN_COLLECTION_INFERENCE} -d {POSTMAN_LARGE_MODEL_INFERENCE_DATA_FILE} -r cli,htmlextra --reporter-htmlextra-export {ARTIFACTS_INFERENCE_DIR}/{REPORT_FILE} --verbose" - result = os.system(command) + command = [ + "newman", "run", "-e", POSTMAN_ENV_FILE, POSTMAN_COLLECTION_INFERENCE, + "-d", POSTMAN_LARGE_MODEL_INFERENCE_DATA_FILE, "-r", "cli,htmlextra", + "--reporter-htmlextra-export", f"{ARTIFACTS_INFERENCE_DIR}/{REPORT_FILE}", "--verbose" + ] + result = subprocess.run(command, check=True) assert ( - result == 0 + result.returncode == 0 ), "Error: Distributed inference failed, the exit code is not zero" finally: test_utils.stop_torchserve() diff --git a/test/pytest/test_handler.py b/test/pytest/test_handler.py index eea6461534..5014409f7d 100644 --- a/test/pytest/test_handler.py +++ b/test/pytest/test_handler.py @@ -2,7 +2,7 @@ import json import logging import os - +import subprocess import numpy as np import pytest import requests @@ -314,10 +314,14 @@ def test_huggingface_bert_batch_inference(): # Make 2 curl requests in parallel with & # curl --header \"X-Forwarded-For: 1.2.3.4\" won't work since you can't access local host anymore - response = os.popen( - f"curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text} & curl http://127.0.0.1:8080/predictions/BERTSeqClassification -T {input_text}" - ) - response = response.read() + curl_command = [ + "curl", "http://127.0.0.1:8080/predictions/BERTSeqClassification", "-T", input_text + ] + process1 = subprocess.Popen(curl_command, stdout=subprocess.PIPE) + process2 = subprocess.Popen(curl_command, stdout=subprocess.PIPE) + response1, _ = process1.communicate() + response2, _ = process2.communicate() + response = response1.decode() + response2.decode() ## Assert that 2 responses are returned from the same batch assert response == "Not AcceptedNot Accepted" @@ -360,7 +364,7 @@ def test_MMF_activity_recognition_model_register_and_inference_on_valid_model(): def test_huggingface_bert_model_parallel_inference(): number_of_gpus = torch.cuda.device_count() - check = os.popen(f"curl http://localhost:8081/models") + check = subprocess.run(["curl", "http://localhost:8081/models"], capture_output=True, text=True) print(check) if number_of_gpus > 1: batch_size = 1 @@ -385,10 +389,12 @@ def test_huggingface_bert_model_parallel_inference(): "sample_text_captum_input.txt", ) - response = os.popen( - f"curl http://127.0.0.1:8080/predictions/Textgeneration -T {input_text}" + response = subprocess.run( + ["curl", "http://127.0.0.1:8080/predictions/Textgeneration", "-T", input_text], + capture_output=True, + text=True ) - response = response.read() + response = response.stdout assert ( "Bloomberg has decided to publish a new report on the global economy" diff --git a/test/pytest/test_sm_mme_requirements.py b/test/pytest/test_sm_mme_requirements.py index 14d40fee24..d49e159123 100644 --- a/test/pytest/test_sm_mme_requirements.py +++ b/test/pytest/test_sm_mme_requirements.py @@ -1,6 +1,6 @@ import os import pathlib - +import subprocess import pytest import requests import test_utils @@ -103,25 +103,24 @@ def test_oom_on_invoke(): # Make 8 curl requests in parallel with & # Send multiple requests to make sure to hit OOM + processes = [] for i in range(10): - response = os.popen( - f"curl http://127.0.0.1:8080/models/BERTSeqClassification/invoke -T {input_text} && " - f"curl http://127.0.0.1:8080/models/BERTSeqClassification/invoke -T {input_text} && " - f"curl http://127.0.0.1:8080/models/BERTSeqClassification/invoke -T {input_text} && " - f"curl http://127.0.0.1:8080/models/BERTSeqClassification/invoke -T {input_text} && " - f"curl http://127.0.0.1:8080/models/BERTSeqClassification/invoke -T {input_text} && " - f"curl http://127.0.0.1:8080/models/BERTSeqClassification/invoke -T {input_text} && " - f"curl http://127.0.0.1:8080/models/BERTSeqClassification/invoke -T {input_text} && " - f"curl http://127.0.0.1:8080/models/BERTSeqClassification/invoke -T {input_text} " - ) - response = response.read() + for _ in range(8): + process = subprocess.Popen( + ["curl", "http://127.0.0.1:8080/models/BERTSeqClassification/invoke", "-T", input_text], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + processes.append(process) + responses = [] + for process in processes: + stdout, stderr = process.communicate() + responses.append(stdout.decode()) # If OOM is hit, we expect code 507 to be present in the response string - lines = response.split("\n") output = "" - for line in lines: - if "code" in line: - line = line.strip() - output = line + for response in responses: + if '"code": 507,' in response: + output = '"code": 507,' break assert output == '"code": 507,', "OOM Error expected" diff --git a/ts_scripts/api_utils.py b/ts_scripts/api_utils.py index 02e1fa4bc3..b79b033845 100755 --- a/ts_scripts/api_utils.py +++ b/ts_scripts/api_utils.py @@ -2,6 +2,7 @@ import os import shutil import sys +import subprocess REPO_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") sys.path.append(REPO_ROOT) @@ -127,9 +128,21 @@ def trigger_management_tests(): config_file="config.properties", log_file=TS_CONSOLE_LOG_FILE, ) - EXIT_CODE = os.system( - f"newman run -e {POSTMAN_ENV_FILE} {POSTMAN_COLLECTION_MANAGEMENT} -d {POSTMAN_MANAGEMENT_DATA_FILE} -r cli,htmlextra --reporter-htmlextra-export {ARTIFACTS_MANAGEMENT_DIR}/{REPORT_FILE} --verbose" - ) + command = [ + "newman", "run", + "-e", POSTMAN_ENV_FILE, + POSTMAN_COLLECTION_MANAGEMENT, + "-d", POSTMAN_MANAGEMENT_DATA_FILE, + "-r", "cli,htmlextra", + "--reporter-htmlextra-export", f"{ARTIFACTS_MANAGEMENT_DIR}/{REPORT_FILE}", + "--verbose" + ] + try: + result = subprocess.run(command, capture_output=True, text=True, check=True) + EXIT_CODE = result.returncode + except subprocess.CalledProcessError as e: + EXIT_CODE = e.returncode + ts.stop_torchserve() move_logs(TS_CONSOLE_LOG_FILE, ARTIFACTS_MANAGEMENT_DIR) cleanup_model_store() @@ -150,9 +163,21 @@ def trigger_inference_tests(): config_file="config.properties", log_file=TS_CONSOLE_LOG_FILE, ) - EXIT_CODE = os.system( - f"newman run -e {POSTMAN_ENV_FILE} {POSTMAN_COLLECTION_INFERENCE} -d {POSTMAN_INFERENCE_DATA_FILE} -r cli,htmlextra --reporter-htmlextra-export {ARTIFACTS_INFERENCE_DIR}/{REPORT_FILE} --verbose" - ) + command = [ + "newman", "run", + "-e", POSTMAN_ENV_FILE, + POSTMAN_COLLECTION_INFERENCE, + "-d", POSTMAN_INFERENCE_DATA_FILE, + "-r", "cli,htmlextra", + "--reporter-htmlextra-export", f"{ARTIFACTS_INFERENCE_DIR}/{REPORT_FILE}", + "--verbose" + ] + try: + result = subprocess.run(command, capture_output=True, text=True, check=True) + EXIT_CODE = result.returncode + except subprocess.CalledProcessError as e: + EXIT_CODE = e.returncode + ts.stop_torchserve() move_logs(TS_CONSOLE_LOG_FILE, ARTIFACTS_INFERENCE_DIR) cleanup_model_store() @@ -173,9 +198,22 @@ def trigger_workflow_tests(): workflow_store=MODEL_STORE_DIR, log_file=TS_CONSOLE_LOG_FILE, ) - EXIT_CODE = os.system( - f"newman run -e {POSTMAN_ENV_FILE} {POSTMAN_COLLECTION_WORKFLOW} -d {POSTMAN_WORKFLOW_DATA_FILE} -r cli,htmlextra --reporter-htmlextra-export {ARTIFACTS_WORKFLOW_MANAGEMENT_DIR}/{REPORT_FILE} --verbose" - ) + + command = [ + "newman", "run", + "-e", POSTMAN_ENV_FILE, + POSTMAN_COLLECTION_WORKFLOW, + "-d", POSTMAN_WORKFLOW_DATA_FILE, + "-r", "cli,htmlextra", + "--reporter-htmlextra-export", f"{ARTIFACTS_WORKFLOW_MANAGEMENT_DIR}/{REPORT_FILE}", + "--verbose" + ] + try: + result = subprocess.run(command, capture_output=True, text=True, check=True) + EXIT_CODE = result.returncode + except subprocess.CalledProcessError as e: + EXIT_CODE = e.returncode + ts.stop_torchserve() move_logs(TS_CONSOLE_LOG_FILE, ARTIFACTS_WORKFLOW_MANAGEMENT_DIR) cleanup_model_store() @@ -195,9 +233,21 @@ def trigger_workflow_inference_tests(): workflow_store=MODEL_STORE_DIR, log_file=TS_CONSOLE_LOG_FILE, ) - EXIT_CODE = os.system( - f"newman run -e {POSTMAN_ENV_FILE} {POSTMAN_COLLECTION_WORKFLOW_INFERENCE} -d {POSTMAN_WORKFLOW_INFERENCE_DATA_FILE} -r cli,htmlextra --reporter-htmlextra-export {ARTIFACTS_WORKFLOW_INFERENCE_DIR}/{REPORT_FILE} --verbose" - ) + command = [ + "newman", "run", + "-e", POSTMAN_ENV_FILE, + POSTMAN_COLLECTION_WORKFLOW_INFERENCE, + "-d", POSTMAN_WORKFLOW_INFERENCE_DATA_FILE, + "-r", "cli,htmlextra", + "--reporter-htmlextra-export", f"{ARTIFACTS_WORKFLOW_INFERENCE_DIR}/{REPORT_FILE}", + "--verbose" + ] + try: + result = subprocess.run(command, capture_output=True, text=True, check=True) + EXIT_CODE = result.returncode + except subprocess.CalledProcessError as e: + EXIT_CODE = e.returncode + ts.stop_torchserve() move_logs(TS_CONSOLE_LOG_FILE, ARTIFACTS_WORKFLOW_INFERENCE_DIR) cleanup_model_store() @@ -218,9 +268,22 @@ def trigger_explanation_tests(): config_file="config.properties", log_file=TS_CONSOLE_LOG_FILE, ) - EXIT_CODE = os.system( - f"newman run -e {POSTMAN_ENV_FILE} {POSTMAN_COLLECTION_EXPLANATION} -d {POSTMAN_EXPLANATION_DATA_FILE} -r cli,htmlextra --reporter-htmlextra-export {ARTIFACTS_INFERENCE_DIR}/{REPORT_FILE} --verbose" - ) + + command = [ + "newman", "run", + "-e", POSTMAN_ENV_FILE, + POSTMAN_COLLECTION_EXPLANATION, + "-d", POSTMAN_EXPLANATION_DATA_FILE, + "-r", "cli,htmlextra", + "--reporter-htmlextra-export", f"{ARTIFACTS_EXPLANATION_DIR}/{REPORT_FILE}", + "--verbose" + ] + try: + result = subprocess.run(command, capture_output=True, text=True, check=True) + EXIT_CODE = result.returncode + except subprocess.CalledProcessError as e: + EXIT_CODE = e.returncode + ts.stop_torchserve() move_logs(TS_CONSOLE_LOG_FILE, ARTIFACTS_EXPLANATION_DIR) cleanup_model_store() @@ -245,9 +308,23 @@ def trigger_incr_timeout_inference_tests(): config_file="config.properties", log_file=TS_CONSOLE_LOG_FILE, ) - EXIT_CODE = os.system( - f"newman run -e {POSTMAN_ENV_FILE} {POSTMAN_COLLECTION_INFERENCE} -d {POSTMAN_INCRSD_TIMEOUT_INFERENCE_DATA_FILE} -r cli,htmlextra --reporter-htmlextra-export {ARTIFACTS_INCRSD_TIMEOUT_INFERENCE_DIR}/{REPORT_FILE} --verbose" - ) + + command = [ + "newman", "run", + "-e", POSTMAN_ENV_FILE, + POSTMAN_COLLECTION_INFERENCE, + "-d", POSTMAN_INCRSD_TIMEOUT_INFERENCE_DATA_FILE, + "-r", "cli,htmlextra", + "--reporter-htmlextra-export", f"{ARTIFACTS_INCRSD_TIMEOUT_INFERENCE_DIR}/{REPORT_FILE}", + "--verbose" + ] + + try: + result = subprocess.run(command, capture_output=True, text=True, check=True) + EXIT_CODE = result.returncode + except subprocess.CalledProcessError as e: + EXIT_CODE = e.returncode + ts.stop_torchserve() move_logs(TS_CONSOLE_LOG_FILE, ARTIFACTS_INCRSD_TIMEOUT_INFERENCE_DIR) cleanup_model_store() @@ -264,9 +341,23 @@ def trigger_https_tests(): config_file=TS_CONFIG_FILE_HTTPS, log_file=TS_CONSOLE_LOG_FILE, ) - EXIT_CODE = os.system( - f"newman run --insecure -e {POSTMAN_ENV_FILE} {POSTMAN_COLLECTION_HTTPS} -r cli,htmlextra --reporter-htmlextra-export {ARTIFACTS_HTTPS_DIR}/{REPORT_FILE} --verbose" - ) + + command = [ + "newman", "run", + "--insecure", # Option to allow insecure HTTPS requests + "-e", POSTMAN_ENV_FILE, + POSTMAN_COLLECTION_HTTPS, + "-r", "cli,htmlextra", + "--reporter-htmlextra-export", f"{ARTIFACTS_HTTPS_DIR}/{REPORT_FILE}", + "--verbose" + ] + + try: + result = subprocess.run(command, capture_output=True, text=True, check=True) + EXIT_CODE = result.returncode + except subprocess.CalledProcessError as e: + EXIT_CODE = e.returncode + ts.stop_torchserve() move_logs(TS_CONSOLE_LOG_FILE, ARTIFACTS_HTTPS_DIR) cleanup_model_store() @@ -289,9 +380,23 @@ def trigger_management_tests_kf(): config_file="config.properties", log_file=TS_CONSOLE_LOG_FILE, ) - EXIT_CODE = os.system( - f"newman run -e {POSTMAN_ENV_FILE} {POSTMAN_COLLECTION_MANAGEMENT} -d {POSTMAN_MANAGEMENT_DATA_FILE} -r cli,htmlextra --reporter-htmlextra-export {ARTIFACTS_MANAGEMENT_DIR_KF}/{REPORT_FILE} --verbose" - ) + + command = [ + "newman", "run", + "-e", POSTMAN_ENV_FILE, + POSTMAN_COLLECTION_MANAGEMENT, + "-d", POSTMAN_MANAGEMENT_DATA_FILE, + "-r", "cli,htmlextra", + "--reporter-htmlextra-export", f"{ARTIFACTS_MANAGEMENT_DIR_KF}/{REPORT_FILE}", + "--verbose" + ] + + try: + result = subprocess.run(command, capture_output=True, text=True, check=True) + EXIT_CODE = result.returncode + except subprocess.CalledProcessError as e: + EXIT_CODE = e.returncode + ts.stop_torchserve() move_logs(TS_CONSOLE_LOG_FILE, ARTIFACTS_MANAGEMENT_DIR_KF) cleanup_model_store() @@ -315,9 +420,23 @@ def trigger_inference_tests_kf(): config_file="config.properties", log_file=TS_CONSOLE_LOG_FILE, ) - EXIT_CODE = os.system( - f"newman run -e {POSTMAN_ENV_FILE} {POSTMAN_COLLECTION_INFERENCE_KF} -d {POSTMAN_INFERENCE_DATA_FILE_KF} -r cli,htmlextra --reporter-htmlextra-export {ARTIFACTS_INFERENCE_DIR_KF}/{REPORT_FILE} --verbose" - ) + + command = [ + "newman", "run", + "-e", POSTMAN_ENV_FILE, + POSTMAN_COLLECTION_INFERENCE_KF, + "-d", POSTMAN_INFERENCE_DATA_FILE_KF, + "-r", "cli,htmlextra", + "--reporter-htmlextra-export", f"{ARTIFACTS_INFERENCE_DIR_KF}/{REPORT_FILE}", + "--verbose" + ] + + try: + result = subprocess.run(command, capture_output=True, text=True, check=True) + EXIT_CODE = result.returncode + except subprocess.CalledProcessError as e: + EXIT_CODE = e.returncode + ts.stop_torchserve() move_logs(TS_CONSOLE_LOG_FILE, ARTIFACTS_INFERENCE_DIR_KF) cleanup_model_store() @@ -333,9 +452,23 @@ def trigger_https_tests_kf(): config_file=TS_CONFIG_FILE_HTTPS_KF, log_file=TS_CONSOLE_LOG_FILE, ) - EXIT_CODE = os.system( - f"newman run --insecure -e {POSTMAN_ENV_FILE} {POSTMAN_COLLECTION_HTTPS_KF} -r cli,htmlextra --reporter-htmlextra-export {ARTIFACTS_HTTPS_DIR_KF}/{REPORT_FILE} --verbose" - ) + + command = [ + "newman", "run", + "--insecure", + "-e", POSTMAN_ENV_FILE, + POSTMAN_COLLECTION_HTTPS_KF, + "-r", "cli,htmlextra", + "--reporter-htmlextra-export", f"{ARTIFACTS_HTTPS_DIR_KF}/{REPORT_FILE}", + "--verbose" + ] + + try: + result = subprocess.run(command, capture_output=True, text=True, check=True) + EXIT_CODE = result.returncode + except subprocess.CalledProcessError as e: + EXIT_CODE = e.returncode + ts.stop_torchserve() move_logs(TS_CONSOLE_LOG_FILE, ARTIFACTS_HTTPS_DIR_KF) cleanup_model_store() @@ -358,9 +491,23 @@ def trigger_inference_tests_kfv2(): config_file="config.properties", log_file=TS_CONSOLE_LOG_FILE, ) - EXIT_CODE = os.system( - f"newman run -e {POSTMAN_ENV_FILE} {POSTMAN_COLLECTION_INFERENCE_KFV2} -d {POSTMAN_INFERENCE_DATA_FILE_KFV2} -r cli,htmlextra --reporter-htmlextra-export {ARTIFACTS_INFERENCE_DIR_KFV2}/{REPORT_FILE} --verbose" - ) + + command = [ + "newman", "run", + "-e", POSTMAN_ENV_FILE, + POSTMAN_COLLECTION_INFERENCE_KFV2, + "-d", POSTMAN_INFERENCE_DATA_FILE_KFV2, + "-r", "cli,htmlextra", + "--reporter-htmlextra-export", f"{ARTIFACTS_INFERENCE_DIR_KFV2}/{REPORT_FILE}", + "--verbose" + ] + + try: + result = subprocess.run(command, capture_output=True, text=True, check=True) + EXIT_CODE = result.returncode + except subprocess.CalledProcessError as e: + EXIT_CODE = e.returncode + ts.stop_torchserve() move_logs(TS_CONSOLE_LOG_FILE, ARTIFACTS_INFERENCE_DIR_KFV2) cleanup_model_store() @@ -376,9 +523,22 @@ def trigger_https_tests_kfv2(): config_file=TS_CONFIG_FILE_HTTPS_KFV2, log_file=TS_CONSOLE_LOG_FILE, ) - EXIT_CODE = os.system( - f"newman run --insecure -e {POSTMAN_ENV_FILE} {POSTMAN_COLLECTION_HTTPS_KFV2} -r cli,htmlextra --reporter-htmlextra-export {ARTIFACTS_HTTPS_DIR_KFV2}/{REPORT_FILE} --verbose" - ) + command = [ + "newman", "run", + "--insecure", + "-e", POSTMAN_ENV_FILE, + POSTMAN_COLLECTION_HTTPS_KFV2, + "-r", "cli,htmlextra", + "--reporter-htmlextra-export", f"{ARTIFACTS_HTTPS_DIR_KFV2}/{REPORT_FILE}", + "--verbose" + ] + + try: + result = subprocess.run(command, capture_output=True, text=True, check=True) + EXIT_CODE = result.returncode + except subprocess.CalledProcessError as e: + EXIT_CODE = e.returncode + ts.stop_torchserve() move_logs(TS_CONSOLE_LOG_FILE, ARTIFACTS_HTTPS_DIR_KFV2) cleanup_model_store() diff --git a/ts_scripts/backend_utils.py b/ts_scripts/backend_utils.py index e5d0ea6c22..2619297b5b 100755 --- a/ts_scripts/backend_utils.py +++ b/ts_scripts/backend_utils.py @@ -1,4 +1,5 @@ import os +import subprocess import sys @@ -14,9 +15,16 @@ def test_torchserve(): coverage_dir = os.path.join("ts") report_output_dir = os.path.join(test_dir, "coverage.xml") - ts_test_cmd = f"python -m pytest --cov-report xml:{report_output_dir} --cov={coverage_dir} {test_dir} {handler_test_dir}" + ts_test_cmd = [ + "python", "-m", "pytest", + "--cov-report", f"xml:{report_output_dir}", + "--cov", coverage_dir, + test_dir, + handler_test_dir + ] print(f"## In directory: {os.getcwd()} | Executing command: {ts_test_cmd}") - ts_test_error_code = os.system(ts_test_cmd) + try: + subprocess.run(ts_test_cmd, capture_output=True, text=True, check=True) + except subprocess.CalledProcessError as e: + sys.exit(e.returncode) - if ts_test_error_code != 0: - sys.exit("## TorchServe Pytests Failed !") diff --git a/ts_scripts/frontend_utils.py b/ts_scripts/frontend_utils.py index a8eddc6ca1..fc430b4725 100755 --- a/ts_scripts/frontend_utils.py +++ b/ts_scripts/frontend_utils.py @@ -1,13 +1,14 @@ import os import sys - +import subprocess def test_frontend(): print("## Started frontend build and tests") frontend_gradlew_path = os.path.join("frontend", "gradlew") - frontend_gradlew_cmd = f"{frontend_gradlew_path} -p frontend clean build" + frontend_gradlew_cmd = [frontend_gradlew_path, "-p", "frontend", "clean", "build"] print(f"## In directory: {os.getcwd()} | Executing command: {frontend_gradlew_cmd}") - frontend_gradlew_exit_code = os.system(frontend_gradlew_cmd) - if frontend_gradlew_exit_code != 0: + try: + subprocess.run(frontend_gradlew_cmd, capture_output=True, text=True, check=True) + except subprocess.CalledProcessError: sys.exit("## Frontend Gradle Tests Failed !") diff --git a/ts_scripts/install_dependencies.py b/ts_scripts/install_dependencies.py index 20bd76599a..96d2cc4750 100644 --- a/ts_scripts/install_dependencies.py +++ b/ts_scripts/install_dependencies.py @@ -1,6 +1,8 @@ import argparse import os import platform +import shlex +import subprocess import sys from print_env_info import run_and_parse_first_match @@ -88,6 +90,15 @@ def __init__(self): self.torch_stable_url = "https://download.pytorch.org/whl/torch_stable.html" self.sudo_cmd = "sudo " + def check_command(self, command): + """Check if a command is available on the system.""" + try: + command = shlex.split(command) + subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) + return True + except subprocess.CalledProcessError: + return False + def install_java(self): pass @@ -107,63 +118,103 @@ def install_torch_packages(self, cuda_version): ) sys.exit(1) else: - os.system( - f"{sys.executable} -m pip install -U -r requirements/torch_{cuda_version}_{platform.system().lower()}.txt" - ) + torch_cuda_requirements_file = f"requirements/torch_{cuda_version}_{platform.system().lower()}.txt" + try: + subprocess.run( + [sys.executable, "-m", "pip", "install", "-U", "-r", torch_cuda_requirements_file], + capture_output=True, text=True, check=True + ) + except subprocess.CalledProcessError as e: + sys.exit(e.returncode) elif args.neuronx: - torch_neuronx_requirements_file = os.path.join( - "requirements", "torch_neuronx_linux.txt" - ) - os.system( - f"{sys.executable} -m pip install -U -r {torch_neuronx_requirements_file}" - ) + torch_neuronx_requirements_file = "requirements/torch_neuronx_linux.txt" + try: + subprocess.run( + [sys.executable, "-m", "pip", "install", "-U", "-r", torch_neuronx_requirements_file], + capture_output=True, text=True, check=True + ) + except subprocess.CalledProcessError as e: + sys.exit(e.returncode) else: if platform.machine() == "aarch64": - os.system( - f"{sys.executable} -m pip install -U -r requirements/torch_{platform.system().lower()}_{platform.machine()}.txt" - ) + requirements_file = f"requirements/torch_{platform.system().lower()}_{platform.machine()}.txt" else: - os.system( - f"{sys.executable} -m pip install -U -r requirements/torch_{platform.system().lower()}.txt" + requirements_file = f"requirements/torch_{platform.system().lower()}.txt" + try: + subprocess.run( + [sys.executable, "-m", "pip", "install", "-U", "-r", requirements_file], + capture_output=True, text=True, check=True ) + except subprocess.CalledProcessError as e: + sys.exit(e.returncode) def install_python_packages(self, cuda_version, requirements_file_path, nightly): check = "where" if platform.system() == "Windows" else "which" - if os.system(f"{check} conda") == 0: + if subprocess.run([check, "conda"], capture_output=True).returncode == 0: # conda install command should run before the pip install commands # as it may reinstall the packages with different versions - os.system("conda install -y conda-build") + print("Conda found. Installing conda-build...") + try: + result = subprocess.run( + ["conda", "install", "-y", "conda-build"], + capture_output=True, text=True, check=True + ) + print(result.stdout) + except subprocess.CalledProcessError as e: + print(f"Error installing conda-build: {e.stderr}") + sys.exit(e.returncode) # Install PyTorch packages if nightly: pt_nightly = "cpu" if not cuda_version else cuda_version - os.system( - f"pip3 install numpy --pre torch torchvision torchaudio torchtext --index-url https://download.pytorch.org/whl/nightly/{pt_nightly}" - ) + try: + subprocess.run( + [ + "pip3", "install", "numpy", "--pre", "torch", "torchvision", "torchaudio", "torchtext", + f"--index-url=https://download.pytorch.org/whl/nightly/{pt_nightly}" + ], + capture_output=True, text=True, check=True + ) + except subprocess.CalledProcessError as e: + sys.exit(e.returncode) elif args.skip_torch_install: print("Skipping Torch installation") else: self.install_torch_packages(cuda_version) # developer.txt also installs packages from common.txt - os.system(f"{sys.executable} -m pip install -U -r {requirements_file_path}") + try: + subprocess.run( + [sys.executable, "-m", "pip", "install", "-U", "-r", requirements_file_path], + capture_output=True, text=True, check=True + ) + except subprocess.CalledProcessError as e: + sys.exit(e.returncode) # Install dependencies for GPU if not isinstance(cuda_version, type(None)): gpu_requirements_file = os.path.join("requirements", "common_gpu.txt") - os.system(f"{sys.executable} -m pip install -U -r {gpu_requirements_file}") + try: + subprocess.run( + [sys.executable, "-m", "pip", "install", "-U", "-r", gpu_requirements_file], + capture_output=True, text=True, check=True + ) + except subprocess.CalledProcessError as e: + sys.exit(e.returncode) # Install dependencies for Inferentia2 if args.neuronx: neuronx_requirements_file = os.path.join("requirements", "neuronx.txt") - os.system( - f"{sys.executable} -m pip install -U -r {neuronx_requirements_file}" - ) + try: + subprocess.run( + [sys.executable, "-m", "pip", "install", "-U", "-r", neuronx_requirements_file], + capture_output=True, text=True, check=True + ) + except subprocess.CalledProcessError as e: + sys.exit(e.returncode) def install_node_packages(self): - os.system( - f"{self.sudo_cmd}npm install -g newman@5.3.2 newman-reporter-htmlextra markdown-link-check" - ) + subprocess.run([{self.sudo_cmd}, "npm", "install", "-g", "newman@5.3.2", "newman-reporter-htmlextra", "markdown-link-check"], check=True) def install_jmeter(self): pass @@ -190,55 +241,49 @@ def __init__(self): self.sudo_cmd = "" if os.geteuid() == 0 else self.sudo_cmd if args.force: - os.system(f"{self.sudo_cmd}apt-get update") + subprocess.run([self.sudo_cmd, "apt-get", "update"], check=True) def install_java(self): - if os.system("javac --version") != 0 or args.force: - os.system(f"{self.sudo_cmd}apt-get install -y openjdk-17-jdk") + if self.check_command("javac --version") or args.force: + subprocess.run([self.sudo_cmd, "apt-get", "install", "-y", "openjdk-17-jdk"], check=True) def install_nodejs(self): - if os.system("node -v") != 0 or args.force: - os.system( - f"{self.sudo_cmd}curl -sL https://deb.nodesource.com/setup_18.x | {self.sudo_cmd}bash -" - ) - os.system(f"{self.sudo_cmd}apt-get install -y nodejs") + if self.check_command("node -v") or args.force: + subprocess.run([self.sudo_cmd, "curl", "-sL", "https://deb.nodesource.com/setup_18.x", "|", "bash", "-"], check=True) + subprocess.run([self.sudo_cmd, "apt-get", "install", "-y", "nodejs"], check=True) def install_wget(self): - if os.system("wget --version") != 0 or args.force: - os.system(f"{self.sudo_cmd}apt-get install -y wget") + if self.check_command("wget --version") or args.force: + subprocess.run([self.sudo_cmd, "apt-get", "install", "-y", "wget"], check=True) def install_numactl(self): - if os.system("numactl --show") != 0 or args.force: - os.system(f"{self.sudo_cmd}apt-get install -y numactl") + if self.check_command("numactl --show") or args.force: + subprocess.run([self.sudo_cmd, "apt-get", "install", "-y", "numactl"], check=True) def install_cpp_dependencies(self): - os.system( - f"{self.sudo_cmd}apt-get install -y {' '.join(CPP_LINUX_DEPENDENCIES)}" - ) + subprocess.run([self.sudo_cmd, ["apt-get", "install", "-y"] + list(CPP_LINUX_DEPENDENCIES)], check=True) def install_neuronx_driver(self): # Configure Linux for Neuron repository updates - os.system( - ". /etc/os-release\n" - + f"{self.sudo_cmd}tee /etc/apt/sources.list.d/neuron.list > /dev/null < /dev/null <