Skip to content

Commit

Permalink
Add missing project SSH key to on-prem instances (#1716)
Browse files Browse the repository at this point in the history
Fixes: #1640
  • Loading branch information
un-def authored Sep 24, 2024
1 parent 047669e commit fe272d3
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 10 deletions.
21 changes: 11 additions & 10 deletions src/dstack/_internal/server/background/tasks/process_instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,9 +230,12 @@ async def _add_remote(instance: InstanceModel) -> None:
)
return

authorized_keys = [pk.public.strip() for pk in remote_details.ssh_keys]
authorized_keys.append(instance.project.ssh_public_key.strip())

try:
future = asyncio.get_running_loop().run_in_executor(
None, _deploy_instance, remote_details, pkeys
None, _deploy_instance, remote_details, pkeys, authorized_keys
)
deploy_timeout = 20 * 60 # 20 minutes
result = await asyncio.wait_for(future, timeout=deploy_timeout)
Expand Down Expand Up @@ -317,7 +320,9 @@ async def _add_remote(instance: InstanceModel) -> None:


def _deploy_instance(
remote_details: RemoteConnectionInfo, pkeys: List[PKey]
remote_details: RemoteConnectionInfo,
pkeys: List[PKey],
authorized_keys: List[str],
) -> Tuple[HealthStatus, Dict[str, Any]]:
with get_paramiko_connection(
remote_details.ssh_user, remote_details.host, remote_details.port, pkeys
Expand All @@ -328,17 +333,11 @@ def _deploy_instance(

# Execute pre start commands
shim_pre_start_commands = get_shim_pre_start_commands(runner_build)
run_pre_start_commands(
client,
shim_pre_start_commands,
authorized_keys=[pk.public.strip() for pk in remote_details.ssh_keys],
)
run_pre_start_commands(client, shim_pre_start_commands, authorized_keys)
logger.debug("The script for installing dstack has been executed")

# Upload envs
shim_envs = get_shim_env(
runner_build, authorized_keys=[sk.public for sk in remote_details.ssh_keys]
)
shim_envs = get_shim_env(runner_build, authorized_keys)
try:
fleet_configuration_envs = remote_details.env.as_dict()
except ValueError as e:
Expand Down Expand Up @@ -538,6 +537,8 @@ async def _check_instance(instance: InstanceModel) -> None:
return

ssh_private_key = instance.project.ssh_private_key
# TODO: Drop this logic and always use project key once it's safe to assume that most on-prem
# fleets are (re)created after this change: https://github.com/dstackai/dstack/pull/1716
if instance.remote_connection_info is not None:
remote_conn_info: RemoteConnectionInfo = RemoteConnectionInfo.__response__.parse_raw(
instance.remote_connection_info
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ async def _process_running_job(session: AsyncSession, job_model: JobModel):
)

server_ssh_private_key = project.ssh_private_key
# TODO: Drop this logic and always use project key once it's safe to assume that most on-prem
# fleets are (re)created after this change: https://github.com/dstackai/dstack/pull/1716
if (
job_model.instance is not None
and job_model.instance.remote_connection_info is not None
Expand Down
5 changes: 5 additions & 0 deletions src/dstack/_internal/server/services/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ async def stop_runner(session: AsyncSession, job_model: JobModel):
)
instance: Optional[InstanceModel] = res.scalar()

# TODO: Drop this logic and always use project key once it's safe to assume that most on-prem
# fleets are (re)created after this change: https://github.com/dstackai/dstack/pull/1716
if instance and instance.remote_connection_info is not None:
remote_conn_info: RemoteConnectionInfo = RemoteConnectionInfo.__response__.parse_raw(
instance.remote_connection_info
Expand Down Expand Up @@ -210,6 +212,9 @@ async def process_terminating_job(session: AsyncSession, job_model: JobModel):
jpd = JobProvisioningData.__response__.parse_raw(job_model.job_provisioning_data)
logger.debug("%s: stopping container", fmt(job_model))
ssh_private_key = instance.project.ssh_private_key
# TODO: Drop this logic and always use project key once it's safe to assume that
# most on-prem fleets are (re)created after this change:
# https://github.com/dstackai/dstack/pull/1716
if instance and instance.remote_connection_info is not None:
remote_conn_info: RemoteConnectionInfo = (
RemoteConnectionInfo.__response__.parse_raw(
Expand Down

0 comments on commit fe272d3

Please sign in to comment.