Skip to content

Commit

Permalink
[gantry] add the service to the failure list when inspecting image fa…
Browse files Browse the repository at this point in the history
…ils.
  • Loading branch information
shizunge committed Jan 24, 2024
1 parent bf0af2d commit d053f05
Showing 1 changed file with 57 additions and 59 deletions.
116 changes: 57 additions & 59 deletions src/lib-gantry.sh
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,13 @@ _remove_images() {
return 0
fi
local IMAGE_NUM=
IMAGE_NUM=$(get_number_of_elements "${STATIC_VAR_IMAGES_TO_REMOVE}")
IMAGE_NUM=$(_get_number_of_elements "${STATIC_VAR_IMAGES_TO_REMOVE}")
log INFO "Removing ${IMAGE_NUM} image(s):"
for I in $(echo "${STATIC_VAR_IMAGES_TO_REMOVE}" | tr '\n' ' '); do
log INFO "- ${I}"
done
local IMAGE_OF_THIS_CONTAINER=
IMAGE_OF_THIS_CONTAINER=$(get_service_image "$(current_service_name)")
IMAGE_OF_THIS_CONTAINER=$(_get_service_image "$(_current_service_name)")
[ -z "${IMAGE_OF_THIS_CONTAINER}" ] && IMAGE_OF_THIS_CONTAINER="shizunge/gantry:image-remover"
local IMAGES_TO_REMOVE=
IMAGES_TO_REMOVE=$(echo "${STATIC_VAR_IMAGES_TO_REMOVE}" | tr '\n' ' ')
Expand Down Expand Up @@ -200,7 +200,7 @@ _report_services_updated() {
return 0
fi
local UPDATED_NUM=
UPDATED_NUM=$(get_number_of_elements "${STATIC_VAR_SERVICES_UPDATED}")
UPDATED_NUM=$(_get_number_of_elements "${STATIC_VAR_SERVICES_UPDATED}")
echo "${UPDATED_NUM} service(s) updated:"
for S in ${STATIC_VAR_SERVICES_UPDATED}; do
echo "- ${S}"
Expand All @@ -216,19 +216,19 @@ _add_service_update_failed() {
STATIC_VAR_SERVICES_UPDATE_FAILED=$(add_uniq_to_list "${STATIC_VAR_SERVICES_UPDATE_FAILED}" "${SERVICE_NAME}")
}

report_services_update_failed() {
_report_services_update_failed() {
if [ -z "${STATIC_VAR_SERVICES_UPDATE_FAILED}" ]; then
return 0
fi
local FAILED_NUM=
FAILED_NUM=$(get_number_of_elements "${STATIC_VAR_SERVICES_UPDATE_FAILED}")
FAILED_NUM=$(_get_number_of_elements "${STATIC_VAR_SERVICES_UPDATE_FAILED}")
echo "${FAILED_NUM} service(s) update failed:"
for S in ${STATIC_VAR_SERVICES_UPDATE_FAILED}; do
echo "- ${S}"
done
}

get_number_of_elements() {
_get_number_of_elements() {
local LIST="${*}"
[ -z "${LIST}" ] && echo 0 && return 0
# SC2086: Double quote to prevent globbing and word splitting.
Expand All @@ -238,25 +238,25 @@ get_number_of_elements() {
echo "${NUM}"
}

report_services() {
_report_services() {
local UPDATED_MSG=
local FAILED_MSG=
UPDATED_MSG=$(_report_services_updated)
echo "${UPDATED_MSG}" | log_lines INFO
FAILED_MSG=$(report_services_update_failed)
FAILED_MSG=$(_report_services_update_failed)
echo "${FAILED_MSG}" | log_lines INFO
# Send notification
local UPDATED_NUM FAILED_NUM TITLE BODY
UPDATED_NUM=$(get_number_of_elements "${STATIC_VAR_SERVICES_UPDATED}")
FAILED_NUM=$(get_number_of_elements "${STATIC_VAR_SERVICES_UPDATE_FAILED}")
UPDATED_NUM=$(_get_number_of_elements "${STATIC_VAR_SERVICES_UPDATED}")
FAILED_NUM=$(_get_number_of_elements "${STATIC_VAR_SERVICES_UPDATE_FAILED}")
local TYPE="success"
[ "${FAILED_NUM}" -ne "0" ] && TYPE="failure"
TITLE="[gantry] ${UPDATED_NUM} services updated ${FAILED_NUM} failed"
BODY=$(echo -e "${UPDATED_MSG}\n${FAILED_MSG}")
_send_notification "${TYPE}" "${TITLE}" "${BODY}"
}

in_list() {
_in_list() {
local LIST="${1}"
local SEARCHED_ITEM="${2}"
for ITEM in ${LIST}; do
Expand All @@ -267,7 +267,7 @@ in_list() {
return 1
}

current_container_name() {
_current_container_name() {
[ -n "${STATIC_VAR_CURRENT_CONTAINER_NAME}" ] && echo "${STATIC_VAR_CURRENT_CONTAINER_NAME}" && return 0
local ALL_NETWORKS GWBRIDGE_NETWORK IPS;
ALL_NETWORKS=$(docker network ls --format '{{.ID}}') || return 1;
Expand Down Expand Up @@ -295,20 +295,20 @@ current_container_name() {
return 0;
}

current_service_name() {
_current_service_name() {
[ -n "${STATIC_VAR_CURRENT_SERVICE_NAME}" ] && echo "${STATIC_VAR_CURRENT_SERVICE_NAME}" && return 0
local CNAME=
CNAME=$(current_container_name) || return 1
CNAME=$(_current_container_name) || return 1
[ -z "${CNAME}" ] && return 0
local SNAME=
SNAME=$(docker container inspect "${CNAME}" --format '{{range $key,$value := .Config.Labels}}{{$key}}={{println $value}}{{end}}' | grep "com.docker.swarm.service.name" | sed "s/com.docker.swarm.service.name=\(.*\)$/\1/") || return 1
STATIC_VAR_CURRENT_SERVICE_NAME=${SNAME}
echo "${SNAME}"
}

service_is_self() {
_service_is_self() {
if [ -z "${GANTRY_SERVICES_SELF}" ]; then
GANTRY_SERVICES_SELF=$(current_service_name)
GANTRY_SERVICES_SELF=$(_current_service_name)
export GANTRY_SERVICES_SELF
[ -n "${GANTRY_SERVICES_SELF}" ] && log INFO "Set GANTRY_SERVICES_SELF to ${GANTRY_SERVICES_SELF}."
fi
Expand All @@ -317,19 +317,19 @@ service_is_self() {
[ "${SERVICE_NAME}" = "${SELF}" ]
}

get_service_image() {
_get_service_image() {
local SERVICE_NAME="${1}"
[ -z "${SERVICE_NAME}" ] && return 1
docker service inspect -f '{{.Spec.TaskTemplate.ContainerSpec.Image}}' "${SERVICE_NAME}"
}

get_service_previous_image() {
_get_service_previous_image() {
local SERVICE_NAME="${1}"
[ -z "${SERVICE_NAME}" ] && return 1
docker service inspect -f '{{.PreviousSpec.TaskTemplate.ContainerSpec.Image}}' "${SERVICE_NAME}"
}

get_service_mode() {
_get_service_mode() {
local SERVICE_NAME="${1}"
local MODE=
if ! MODE=$(docker service ls --filter "name=${SERVICE_NAME}" --format '{{.Mode}}' 2>&1); then
Expand All @@ -341,20 +341,20 @@ get_service_mode() {

# echo the mode when the service is replicated job or global job
# return whether a service is replicated job or global job
service_is_job() {
_service_is_job() {
local SERVICE_NAME="${1}"
local MODE=
if ! MODE=$(get_service_mode "${SERVICE_NAME}"); then
if ! MODE=$(_get_service_mode "${SERVICE_NAME}"); then
return 1
fi
# Looking for replicated-job or global-job
echo "${MODE}" | grep "job"
}

service_is_replicated() {
_service_is_replicated() {
local SERVICE_NAME="${1}"
local MODE=
if ! MODE=$(get_service_mode "${SERVICE_NAME}"); then
if ! MODE=$(_get_service_mode "${SERVICE_NAME}"); then
return 1
fi
# Looking for replicated, not replicated-job
Expand All @@ -364,7 +364,7 @@ service_is_replicated() {
echo "${MODE}"
}

get_config_from_service() {
_get_config_from_service() {
local SERVICE_NAME="${1}"
local AUTH_CONFIG_LABEL="gantry.auth.config"
local AUTH_CONFIG=
Expand All @@ -376,7 +376,7 @@ get_config_from_service() {
echo "--config ${AUTH_CONFIG}"
}

get_image_info() {
_get_image_info() {
local MANIFEST_OPTIONS="${GANTRY_MANIFEST_OPTIONS:-""}"
local MANIFEST_CMD="${1}"
local IMAGE="${2}"
Expand All @@ -396,12 +396,12 @@ get_image_info() {
# echo nothing if we found no new images.
# echo the image if we found a new image.
# return the number of errors.
inspect_image() {
_inspect_image() {
local MANIFEST_CMD="${GANTRY_MANIFEST_CMD:-"buildx"}"
local SERVICE_NAME="${1}"
local DOCKER_CONFIG="${2}"
local IMAGE_WITH_DIGEST=
if ! IMAGE_WITH_DIGEST=$(get_service_image "${SERVICE_NAME}" 2>&1); then
if ! IMAGE_WITH_DIGEST=$(_get_service_image "${SERVICE_NAME}" 2>&1); then
log ERROR "Failed to obtain image from service ${SERVICE_NAME}. ${IMAGE_WITH_DIGEST}"
return 1
fi
Expand All @@ -410,19 +410,19 @@ inspect_image() {
IMAGE=$(echo "${IMAGE_WITH_DIGEST}" | cut -d@ -f1)
DIGEST=$(echo "${IMAGE_WITH_DIGEST}" | cut -d@ -f2)
# Never skip inspecting self
if echo "${MANIFEST_CMD}" | grep -q -i "none" && ! service_is_self "${SERVICE_NAME}"; then
if echo "${MANIFEST_CMD}" | grep -q -i "none" && ! _service_is_self "${SERVICE_NAME}"; then
echo "${IMAGE}"
return 0
fi
if in_list "${STATIC_VAR_NO_NEW_IMAGES}" "${DIGEST}"; then
if _in_list "${STATIC_VAR_NO_NEW_IMAGES}" "${DIGEST}"; then
return 0
fi
if in_list "${STATIC_VAR_NEW_IMAGES}" "${DIGEST}"; then
if _in_list "${STATIC_VAR_NEW_IMAGES}" "${DIGEST}"; then
echo "${IMAGE}"
return 0
fi
local IMAGE_INFO=
if ! IMAGE_INFO=$(get_image_info "${MANIFEST_CMD}" "${IMAGE}" "${DOCKER_CONFIG}" 2>&1); then
if ! IMAGE_INFO=$(_get_image_info "${MANIFEST_CMD}" "${IMAGE}" "${DOCKER_CONFIG}" 2>&1); then
log ERROR "Image ${IMAGE} does not exist or it is not available. ${IMAGE_INFO}"
return 1
fi
Expand All @@ -435,7 +435,7 @@ inspect_image() {
return 0
}

get_number_of_running_tasks() {
_get_number_of_running_tasks() {
local SERVICE_NAME="${1}"
local REPLICAS=
if ! REPLICAS=$(docker service ls --filter "name=${SERVICE_NAME}" --format '{{.Replicas}}' 2>&1); then
Expand All @@ -450,10 +450,10 @@ get_number_of_running_tasks() {
echo "${NUM_RUNS}"
}

get_service_update_additional_options() {
_get_service_update_additional_options() {
local SERVICE_NAME="${1}"
local NUM_RUNS=
NUM_RUNS=$(get_number_of_running_tasks "${SERVICE_NAME}")
NUM_RUNS=$(_get_number_of_running_tasks "${SERVICE_NAME}")
if ! is_number "${NUM_RUNS}"; then
return 1
fi
Expand All @@ -464,14 +464,14 @@ get_service_update_additional_options() {
OPTIONS="${OPTIONS} --detach=true"
local MODE=
# Do not start a new task. Only works for replicated, not global.
if MODE=$(service_is_replicated "${SERVICE_NAME}"); then
if MODE=$(_service_is_replicated "${SERVICE_NAME}"); then
OPTIONS="${OPTIONS} --replicas=0"
fi
fi
echo "${OPTIONS}"
}

rollback_service() {
_rollback_service() {
local ROLLBACK_ON_FAILURE="${GANTRY_ROLLBACK_ON_FAILURE:-"true"}"
local ROLLBACK_OPTIONS="${GANTRY_ROLLBACK_OPTIONS:-""}"
local SERVICE_NAME="${1}"
Expand All @@ -487,17 +487,14 @@ rollback_service() {
# Add "-quiet" to suppress progress output.
# SC2086: Double quote to prevent globbing and word splitting.
# shellcheck disable=SC2086
ROLLBACK_MSG=$(docker ${DOCKER_CONFIG} service update --quiet ${ADDITIONAL_OPTIONS} ${ROLLBACK_OPTIONS} --rollback "${SERVICE_NAME}" 2>&1)
local RETURN_VALUE=$?
if [ ${RETURN_VALUE} -ne 0 ]; then
if ! ROLLBACK_MSG=$(docker ${DOCKER_CONFIG} service update --quiet ${ADDITIONAL_OPTIONS} ${ROLLBACK_OPTIONS} --rollback "${SERVICE_NAME}" 2>&1); then
log ERROR "Failed to roll back ${SERVICE_NAME}. ${ROLLBACK_MSG}"
else
log INFO "Rolled back ${SERVICE_NAME}."
return 1
fi
return ${RETURN_VALUE}
log INFO "Rolled back ${SERVICE_NAME}."
}

update_single_service() {
_update_single_service() {
local UPDATE_JOBS="${GANTRY_UPDATE_JOBS:-"false"}"
local UPDATE_TIMEOUT_SECONDS="${GANTRY_UPDATE_TIMEOUT_SECONDS:-300}"
local UPDATE_OPTIONS="${GANTRY_UPDATE_OPTIONS:-""}"
Expand All @@ -507,21 +504,22 @@ update_single_service() {
fi
local SERVICE_NAME="${1}"
local MODE=
if ! is_true "${UPDATE_JOBS}" && MODE=$(service_is_job "${SERVICE_NAME}"); then
if ! is_true "${UPDATE_JOBS}" && MODE=$(_service_is_job "${SERVICE_NAME}"); then
log DEBUG "Skip updating service in ${MODE} mode: ${SERVICE_NAME}."
return 0;
fi
local DOCKER_CONFIG=
DOCKER_CONFIG=$(get_config_from_service "${SERVICE_NAME}")
DOCKER_CONFIG=$(_get_config_from_service "${SERVICE_NAME}")
[ -n "${DOCKER_CONFIG}" ] && log DEBUG "Add option \"${DOCKER_CONFIG}\" to docker commands."
local IMAGE=
IMAGE=$(inspect_image "${SERVICE_NAME}" "${DOCKER_CONFIG}")
local RETURN_VALUE=$?
[ ${RETURN_VALUE} -ne 0 ] && return ${RETURN_VALUE}
if ! IMAGE=$(_inspect_image "${SERVICE_NAME}" "${DOCKER_CONFIG}"); then
_add_service_update_failed "${SERVICE_NAME}"
return 1
fi
[ -z "${IMAGE}" ] && log INFO "No new images." && return 0
log INFO "Updating with image ${IMAGE}"
local ADDITIONAL_OPTIONS=
ADDITIONAL_OPTIONS=$(get_service_update_additional_options "${SERVICE_NAME}")
ADDITIONAL_OPTIONS=$(_get_service_update_additional_options "${SERVICE_NAME}")
[ -n "${ADDITIONAL_OPTIONS}" ] && log DEBUG "Adding options \"${ADDITIONAL_OPTIONS}\" to the \"docker service update\" command."
[ -n "${UPDATE_OPTIONS}" ] && log DEBUG "Adding options \"${UPDATE_OPTIONS}\" to the \"docker service update\" command."
local UPDATE_MSG=
Expand All @@ -533,14 +531,14 @@ update_single_service() {
# "service update --rollback" needs to take different options from "service update"
# Today no options are added based on services label/status. This is just a placeholder now.
local ROLLBACK_ADDITIONAL_OPTIONS=
rollback_service "${SERVICE_NAME}" "${DOCKER_CONFIG}" "${ROLLBACK_ADDITIONAL_OPTIONS}"
_rollback_service "${SERVICE_NAME}" "${DOCKER_CONFIG}" "${ROLLBACK_ADDITIONAL_OPTIONS}"
_add_service_update_failed "${SERVICE_NAME}"
return 1
fi
local PREVIOUS_IMAGE=
local CURRENT_IMAGE=
PREVIOUS_IMAGE=$(get_service_previous_image "${SERVICE_NAME}")
CURRENT_IMAGE=$(get_service_image "${SERVICE_NAME}")
PREVIOUS_IMAGE=$(_get_service_previous_image "${SERVICE_NAME}")
CURRENT_IMAGE=$(_get_service_image "${SERVICE_NAME}")
if [ "${PREVIOUS_IMAGE}" = "${CURRENT_IMAGE}" ]; then
log INFO "No updates."
return 0
Expand All @@ -551,7 +549,7 @@ update_single_service() {
return 0
}

get_services_filted() {
_get_services_filted() {
local SERVICES_FILTERS="${1}"
local SERVICES=
local FILTERS=
Expand Down Expand Up @@ -583,24 +581,24 @@ gantry_get_services_list() {
local SERVICES_EXCLUDED_FILTERS="${GANTRY_SERVICES_EXCLUDED_FILTERS:-""}"
local SERVICES_FILTERS="${GANTRY_SERVICES_FILTERS:-""}"
local SERVICES=
if ! SERVICES=$(get_services_filted "${SERVICES_FILTERS}"); then
if ! SERVICES=$(_get_services_filted "${SERVICES_FILTERS}"); then
return 1
fi
if [ -n "${SERVICES_EXCLUDED_FILTERS}" ]; then
local SERVICES_FROM_EXCLUDED_FILTERS=
if ! SERVICES_FROM_EXCLUDED_FILTERS=$(get_services_filted "${SERVICES_EXCLUDED_FILTERS}"); then
if ! SERVICES_FROM_EXCLUDED_FILTERS=$(_get_services_filted "${SERVICES_EXCLUDED_FILTERS}"); then
return 1
fi
SERVICES_EXCLUDED="${SERVICES_EXCLUDED} ${SERVICES_FROM_EXCLUDED_FILTERS}"
fi
local LIST=
local HAS_SELF=
for S in ${SERVICES} ; do
if in_list "${SERVICES_EXCLUDED}" "${S}" ; then
if _in_list "${SERVICES_EXCLUDED}" "${S}" ; then
continue
fi
# Add self to the first of the list.
if service_is_self "${S}"; then
if _service_is_self "${S}"; then
HAS_SELF=${S}
continue
fi
Expand All @@ -619,7 +617,7 @@ gantry_update_services_list() {
local LOG_SCOPE_SAVED="${LOG_SCOPE}"
for SERVICE in ${LIST}; do
LOG_SCOPE="Updating service ${SERVICE}"
update_single_service "${SERVICE}"
_update_single_service "${SERVICE}"
ACCUMULATED_ERRORS=$((ACCUMULATED_ERRORS + $?))
done
LOG_SCOPE=${LOG_SCOPE_SAVED}
Expand All @@ -629,5 +627,5 @@ gantry_update_services_list() {
gantry_finalize() {
local STACK="${1:-gantry}"
_remove_images "${STACK}_image-remover"
report_services;
_report_services;
}

0 comments on commit d053f05

Please sign in to comment.