Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Conditionally remove networkd online dependency on Ubuntu #5772

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 107 additions & 1 deletion cloudinit/cmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import traceback
import logging
import yaml
from typing import Tuple, Callable
from typing import Optional, Tuple, Callable, Union

from cloudinit import netinfo
from cloudinit import signal_handler
Expand All @@ -39,6 +39,7 @@
from cloudinit.config import cc_set_hostname
from cloudinit.config.modules import Modules
from cloudinit.config.schema import validate_cloudconfig_schema
from cloudinit.lifecycle import log_with_downgradable_level
from cloudinit.reporting import events
from cloudinit.settings import (
PER_INSTANCE,
Expand All @@ -47,6 +48,8 @@
CLOUD_CONFIG,
)

Reason = str

# Welcome message template
WELCOME_MSG_TPL = (
"Cloud-init v. {version} running '{action}' at "
Expand Down Expand Up @@ -334,6 +337,90 @@ def _should_bring_up_interfaces(init, args):
return not args.local


def _should_wait_via_cloud_config(
raw_config: Optional[Union[str, bytes]]
) -> Tuple[bool, Reason]:
if not raw_config:
return False, "no configuration found"

# If our header is anything other than #cloud-config, wait
possible_header: Union[bytes, str] = raw_config.strip()[:13]
if isinstance(possible_header, str):
decoded_header = possible_header
elif isinstance(possible_header, bytes):
try:
decoded_header = possible_header.decode("utf-8")
except UnicodeDecodeError:
return True, "Binary user data found"
if not decoded_header.startswith("#cloud-config"):
return True, "non-cloud-config user data found"

try:
parsed_yaml = yaml.safe_load(raw_config)
except Exception as e:
log_with_downgradable_level(
logger=LOG,
version="24.4",
requested_level=logging.WARNING,
msg="Unexpected failure parsing userdata: %s",
args=e,
)
return True, "failed to parse user data as yaml"

# These all have the potential to require network access, so we should wait
if "write_files" in parsed_yaml and any(
"source" in item for item in parsed_yaml["write_files"]
):
return True, "write_files with source found"
if parsed_yaml.get("bootcmd"):
return True, "bootcmd found"
if parsed_yaml.get("random_seed", {}).get("command"):
return True, "random_seed command found"
if parsed_yaml.get("mounts"):
return True, "mounts found"
return False, "cloud-config does not contain network requiring elements"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is good. thanks for the breakdown



def _should_wait_on_network(
datasource: Optional[sources.DataSource],
) -> Tuple[bool, Reason]:
"""Determine if we should wait on network connectivity for cloud-init.

We need to wait if:
- We have no datasource
- We have user data that is anything other than cloud-config
- This can likely be further optimized in the future to include
other user data types
- We have user data that requires network access
"""
if not datasource:
return True, "no datasource found"
user_should_wait, user_reason = _should_wait_via_cloud_config(
datasource.get_userdata_raw()
)
if user_should_wait:
return True, f"{user_reason} in user data"
vendor_should_wait, vendor_reason = _should_wait_via_cloud_config(
datasource.get_vendordata_raw()
)
if vendor_should_wait:
return True, f"{vendor_reason} in vendor data"
vendor2_should_wait, vendor2_reason = _should_wait_via_cloud_config(
datasource.get_vendordata2_raw()
)
if vendor2_should_wait:
return True, f"{vendor2_reason} in vendor data2"

return (
False,
(
f"user data: {user_reason}, "
f"vendor data: {vendor_reason}, "
f"vendor data2: {vendor2_reason}"
),
)


def main_init(name, args):
deps = [sources.DEP_FILESYSTEM, sources.DEP_NETWORK]
if args.local:
Expand Down Expand Up @@ -411,6 +498,9 @@ def main_init(name, args):
mode = sources.DSMODE_LOCAL if args.local else sources.DSMODE_NETWORK

if mode == sources.DSMODE_NETWORK:
if os.path.exists(init.paths.get_runpath(".wait-on-network")):
LOG.debug("Will wait for network connectivity before continuing")
init.distro.wait_for_network()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we now util.del_file("init.paths.get_runpath(".wait-on-network")) now that we've waited and succeeded on the wait_for_network() call?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason to? /run gets cleaned on boot, and if a cloud-init collect-logs will contain the file as another piece of evidence. I don't think it would hurt any further invocations of cloud-init on the current boot.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only reason would be later calls on the command line to cloud-init init per use-cases like -#5726 not needing to actually shell out to run that wait again.

existing = "trust"
sys.stderr.write("%s\n" % (netinfo.debug_info()))
else:
Expand Down Expand Up @@ -478,9 +568,25 @@ def main_init(name, args):
# dhcp clients to advertize this hostname to any DDNS services
# LP: #1746455.
_maybe_set_hostname(init, stage="local", retry_stage="network")

init.apply_network_config(bring_up=bring_up_interfaces)

if mode == sources.DSMODE_LOCAL:
should_wait, reason = _should_wait_on_network(init.datasource)
if should_wait:
LOG.debug(
"Network connectivity determined necessary for "
"cloud-init's network stage. Reason: %s",
reason,
)
util.write_file(init.paths.get_runpath(".wait-on-network"), "")
else:
LOG.debug(
"Network connectivity determined unnecessary for "
"cloud-init's network stage. %s",
reason,
)

if init.datasource.dsmode != mode:
LOG.debug(
"[%s] Exiting. datasource %s not in local mode.",
Expand Down
31 changes: 23 additions & 8 deletions cloudinit/distros/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,15 +349,16 @@ def dhcp_client(self) -> dhcp.DhcpClient:
raise dhcp.NoDHCPLeaseMissingDhclientError()

@property
def network_activator(self) -> Optional[Type[activators.NetworkActivator]]:
"""Return the configured network activator for this environment."""
def network_activator(self) -> Type[activators.NetworkActivator]:
"""Return the configured network activator for this environment.

:returns: The network activator class to use
"raises": NoActivatorException if no activator is found
"""
priority = util.get_cfg_by_path(
self._cfg, ("network", "activators"), None
)
try:
return activators.select_activator(priority=priority)
except activators.NoActivatorException:
return None
return activators.select_activator(priority=priority)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With this change I think you've also missed handling the exception at the call-site in cloudinit/cmd/devel/hotplug_hook.py... Although I think that call-site in hotplug hook is also broken anyway before this PR if cloud-init can't find an activator as activator return value would be None, and we'd try to reference None.bring_up_interface

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, lack of exception handling there was intentional.


@property
def network_renderer(self) -> Renderer:
Expand Down Expand Up @@ -460,8 +461,9 @@ def apply_network_config(self, netconfig, bring_up=False) -> bool:
# Now try to bring them up
if bring_up:
LOG.debug("Bringing up newly configured network interfaces")
network_activator = self.network_activator
if not network_activator:
try:
network_activator = self.network_activator
except activators.NoActivatorException:
LOG.warning(
"No network activator found, not bringing up "
"network interfaces"
Expand Down Expand Up @@ -1574,6 +1576,19 @@ def device_part_info(devpath: str) -> tuple:
# name in /dev/
return diskdevpath, ptnum

def wait_for_network(self):
"""Ensure that cloud-init has network connectivity.

For most distros, this is a no-op as cloud-init's network service is
ordered in boot to start after network connectivity has been achieved.
As an optimization, distros may opt to order cloud-init's network
service immediately after cloud-init's local service, and only
require network connectivity if it has been deemed necessary.
This method is a hook for distros to implement this optimization.
It is called during cloud-init's network stage if it was determined
that network connectivity is necessary in cloud-init's network stage.
"""


def _apply_hostname_transformations_to_url(url: str, transformations: list):
"""
Expand Down
13 changes: 13 additions & 0 deletions cloudinit/distros/ubuntu.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,15 @@
# This file is part of cloud-init. See LICENSE file for license information.

import copy
import logging

from cloudinit.distros import PREFERRED_NTP_CLIENTS, debian
from cloudinit.distros.package_management.snap import Snap
from cloudinit.net import activators
from cloudinit.net.netplan import CLOUDINIT_NETPLAN_FILE

LOG = logging.getLogger(__name__)


class Distro(debian.Distro):
def __init__(self, name, cfg, paths):
Expand Down Expand Up @@ -49,3 +53,12 @@ def preferred_ntp_clients(self):
if not self._preferred_ntp_clients:
self._preferred_ntp_clients = copy.deepcopy(PREFERRED_NTP_CLIENTS)
return self._preferred_ntp_clients

def wait_for_network(self) -> None:
"""Ensure that cloud-init's network service has network connectivity"""
try:
self.network_activator.wait_for_network()
except activators.NoActivatorException:
LOG.error("Failed to wait for network. No network activator found")
except Exception as e:
LOG.error("Failed to wait for network: %s", e)
1 change: 1 addition & 0 deletions cloudinit/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,7 @@ def __init__(self, path_cfgs: dict, ds=None):
"vendor_scripts": "scripts/vendor",
"warnings": "warnings",
"hotplug.enabled": "hotplug.enabled",
".wait-on-network": ".wait-on-network",
}
# Set when a datasource becomes active
self.datasource = ds
Expand Down
Loading