Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/md5 usedforsecurity #17866

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python_modules/dagster/dagster/_core/definitions/assets.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import hashlib
import json
import warnings
from typing import (
Expand Down Expand Up @@ -45,6 +44,7 @@
)
from dagster._utils import IHasInternalInit
from dagster._utils.merger import merge_dicts
from dagster._utils.security import non_secure_md5_hash_str
from dagster._utils.warnings import (
disable_dagster_warnings,
)
Expand Down Expand Up @@ -1348,7 +1348,7 @@ def __str__(self):
@property
def unique_id(self) -> str:
"""A unique identifier for the AssetsDefinition that's stable across processes."""
return hashlib.md5((json.dumps(sorted(self.keys))).encode("utf-8")).hexdigest()
return non_secure_md5_hash_str((json.dumps(sorted(self.keys))).encode("utf-8"))

def with_resources(self, resource_defs: Mapping[str, ResourceDefinition]) -> "AssetsDefinition":
attributes_dict = self.get_attributes_dict()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import hashlib
import os
import shutil
import sys
Expand All @@ -22,6 +21,7 @@
from dagster._serdes import ConfigurableClass, ConfigurableClassData
from dagster._seven import json
from dagster._utils import ensure_dir, ensure_file, touch_file
from dagster._utils.security import non_secure_md5_hash_str

from .captured_log_manager import (
CapturedLogContext,
Expand Down Expand Up @@ -225,7 +225,7 @@ def get_captured_local_path(self, log_key: Sequence[str], extension: str, partia
if partial:
filename = f"{filename}.partial"
if len(filename) > MAX_FILENAME_LENGTH:
filename = "{}.{}".format(hashlib.md5(filebase.encode("utf-8")).hexdigest(), extension)
filename = "{}.{}".format(non_secure_md5_hash_str(filebase.encode("utf-8")), extension)
location = os.path.join(self._base_dir, *namespace, filename)
location = os.path.abspath(location)
if not location.startswith(self._base_dir):
Expand Down
12 changes: 12 additions & 0 deletions python_modules/dagster/dagster/_utils/security.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import hashlib
import sys
from typing import Union


def non_secure_md5_hash_str(s: Union[bytes, bytearray, memoryview]) -> str:
"""Drop in replacement md5 hash function marking it for a non-security purpose."""
# check python version, use usedforsecurity flag if possible.
if sys.version_info[0] <= 3 and sys.version_info[1] <= 8:
return hashlib.md5(s).hexdigest()
else:
return hashlib.md5(s, usedforsecurity=False).hexdigest() # type: ignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import hashlib
import json
import os
from pathlib import Path
Expand Down Expand Up @@ -44,6 +43,7 @@
from dagster._core.definitions.metadata import MetadataUserInput, RawMetadataValue
from dagster._core.errors import DagsterInvalidSubsetError
from dagster._utils.merger import deep_merge_dicts
from dagster._utils.security import non_secure_md5_hash_str
from dagster._utils.warnings import (
deprecation_warning,
normalize_renamed_param,
Expand Down Expand Up @@ -430,7 +430,7 @@ def _dbt_nodes_to_assets(
if not op_name:
op_name = f"run_dbt_{project_id}"
if select != "fqn:*" or exclude:
op_name += "_" + hashlib.md5(select.encode() + exclude.encode()).hexdigest()[-5:]
op_name += "_" + non_secure_md5_hash_str(select.encode() + exclude.encode())[-5:]

check_outs_by_output_name: Mapping[str, Out] = {}
if check_specs_by_output_name:
Expand Down
6 changes: 3 additions & 3 deletions python_modules/libraries/dagster-k8s/dagster_k8s/job.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import copy
import hashlib
import json
import random
import string
Expand All @@ -21,6 +20,7 @@
from dagster._core.utils import parse_env_var
from dagster._serdes import whitelist_for_serdes
from dagster._utils.merger import merge_dicts
from dagster._utils.security import non_secure_md5_hash_str

from .models import k8s_model_from_dict, k8s_snake_case_dict
from .utils import get_common_labels, sanitize_k8s_label
Expand Down Expand Up @@ -892,6 +892,6 @@ def get_k8s_job_name(input_1, input_2=None):
input_2 = "".join(random.choice(letters) for i in range(20))

# Creates 32-bit signed int, so could be negative
name_hash = hashlib.md5((input_1 + input_2).encode("utf-8"))
name_hash = non_secure_md5_hash_str((input_1 + input_2).encode("utf-8"))

return name_hash.hexdigest()
return name_hash