Skip to content

Commit

Permalink
[gantry] fix the problem about incorrectly obtaining the current cont…
Browse files Browse the repository at this point in the history
…ainer name when running outside the container.

This blocks run tests in parallel.
  • Loading branch information
shizunge committed Feb 16, 2024
1 parent 80b46bf commit 87ae432
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 58 deletions.
18 changes: 1 addition & 17 deletions .github/workflows/on-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -133,22 +133,6 @@ jobs:
runs-on: ubuntu-latest
needs:
- build_and_push
strategy:
fail-fast: false
matrix:
test_suit:
- gantry_cleanup_images_spec.sh
- gantry_entrypoint_spec.sh
- gantry_filters_spec.sh
- gantry_job_spec.sh
- gantry_login_spec.sh
- gantry_manifest_spec.sh
- gantry_multiple_services_spec.sh
- gantry_no_running_tasks_spec.sh
- gantry_notify_spec.sh
- gantry_options_spec.sh
- gantry_rollback_spec.sh
- gantry_simple_spec.sh
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3.0.0
Expand All @@ -172,4 +156,4 @@ jobs:
export DOCKERHUB_USERNAME=${{ secrets.DOCKERHUB_USERNAME }}
export GANTRY_TEST_CONTAINER_REPO_TAG=$(cat tag.txt)
echo "GANTRY_TEST_CONTAINER_REPO_TAG=${GANTRY_TEST_CONTAINER_REPO_TAG}"
bash shellspec --pattern tests/${{ matrix.test_suit }} --tag "container_test:true"
bash shellspec --jobs 50 --tag "container_test:true"
25 changes: 15 additions & 10 deletions src/lib-gantry.sh
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,8 @@ _remove_images() {
--mount type=bind,source=/var/run/docker.sock,destination=/var/run/docker.sock \
--env "GANTRY_IMAGES_TO_REMOVE=${IMAGES_TO_REMOVE_LIST}" \
${CLEANUP_IMAGES_OPTIONS} \
"${IMAGES_REMOVER}" 2>&1); then
log ERROR "Failed to remove images: ${RMI_MSG}"
"${IMAGES_REMOVER}"); then
log ERROR "Failed to remove images."
fi
wait_service_state "${SERVICE_NAME}"
docker_service_logs "${SERVICE_NAME}"
Expand Down Expand Up @@ -347,19 +347,24 @@ _current_container_name() {
GWBRIDGE_NETWORK=$(docker network ls --format '{{.ID}}' --filter 'name=docker_gwbridge') || return 1;
IPS=$(ip route | grep src | sed -n "s/.* src \(\S*\).*$/\1/p");
[ -z "${IPS}" ] && return 0;
local NID;
local NID=;
for NID in ${ALL_NETWORKS}; do
[ "${NID}" = "${GWBRIDGE_NETWORK}" ] && continue;
local ALL_LOCAL_NAME_AND_IP;
ALL_LOCAL_NAME_AND_IP=$(docker network inspect "${NID}" --format "{{range .Containers}}{{.Name}}={{println .IPv4Address}}{{end}}") || return 1;
local ALL_LOCAL_NAME_AND_IP=;
ALL_LOCAL_NAME_AND_IP=$(docker network inspect "${NID}" --format "{{range .Containers}}{{.Name}}/{{println .IPv4Address}}{{end}}") || return 1;
for NAME_AND_IP in ${ALL_LOCAL_NAME_AND_IP}; do
[ -z "${NAME_AND_IP}" ] && continue;
# '<container name>/<ip>/<mask>'
# '<container name>/' (when network mode is host)
local CNAME CIP
CNAME=$(echo "${NAME_AND_IP}" | cut -d/ -f1);
CIP=$(echo "${NAME_AND_IP}" | cut -d/ -f2);
# Unable to find the container IP when network mode is host.
[ -z "${CIP}" ] && continue;
for IP in ${IPS}; do
echo "${NAME_AND_IP}" | grep -q "${IP}" || continue;
local NAME;
NAME=$(echo "${NAME_AND_IP}" | sed "s/\(.*\)=${IP}.*$/\1/");
_static_variable_add_unique_to_list STATIC_VAR_CURRENT_CONTAINER_NAME "${NAME}"
echo "${NAME}";
[ "${IP}" != "${CIP}" ] && continue;
_static_variable_add_unique_to_list STATIC_VAR_CURRENT_CONTAINER_NAME "${CNAME}"
echo "${CNAME}";
return 0;
done
done
Expand Down
7 changes: 6 additions & 1 deletion tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,12 @@ bash shellspec --pattern tests/<file_name>
bash shellspec --pattern tests/<file_name> --example <example_within_the_file>
```

To generate coverage (need [kcov](https://github.com/SimonKagstrom/kcov) installed):
To run multiple tests in parallel
```
bash shellspec --jobs 50
```

To generate coverage (require [kcov](https://github.com/SimonKagstrom/kcov) installed):
```
bash shellspec --kcov
```
Expand Down
81 changes: 51 additions & 30 deletions tests/spec_gantry_test_helper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -168,41 +168,55 @@ load_test_registry() {
_start_registry() {
local SUITE_NAME="${1:?}"
SUITE_NAME=$(echo "${SUITE_NAME}" | tr ' ' '-')
local SUITE_NAME_LENGTH="${#SUITE_NAME}"
local REGISTRY_SERVICE_NAME="gantry-test-registry-${SUITE_NAME}"
local REGISTRY_BASE="127.0.0.1"
local REGISTRY_PORT="5000"
local REGISTRY_PORT=$((55000+SUITE_NAME_LENGTH*2))
local TEST_REGISTRY="${REGISTRY_BASE}:${REGISTRY_PORT}"
export TEST_USERNAME="gantry"
export TEST_PASSWORD="gantry"
local REGISTRY_IMAGE="docker.io/registry"
local TRIES=0
local MAX_RETRIES=50
local PORT_LIMIT=500
REGISTRY_PORT=$(_next_available_port "${REGISTRY_PORT}" "${PORT_LIMIT}") || return 1
[ -z "${REGISTRY_PORT}" ] && return 1
TEST_REGISTRY="${REGISTRY_BASE}:${REGISTRY_PORT}"
echo -n "${SUITE_NAME} starting registry ${TEST_REGISTRY} "
# SC2046 (warning): Quote this to prevent word splitting.
# shellcheck disable=SC2046
while ! docker service create --quiet \
--name "${REGISTRY_SERVICE_NAME}" \
--restart-condition "on-failure" \
--restart-max-attempts 5 \
$(_location_constraints) \
--mode=replicated \
-p "${REGISTRY_PORT}:5000" \
"${REGISTRY_IMAGE}" 2>&1; do
while true; do
if ! REGISTRY_PORT=$(_next_available_port "${REGISTRY_PORT}" "${PORT_LIMIT}" 2>&1); then
echo "_start_registry _next_available_port error: ${REGISTRY_PORT}" >&2
return 1
fi
if [ -z "${REGISTRY_PORT}" ]; then
echo "_start_registry _next_available_port error: REGISTRY_PORT is empty." >&2
return 1
fi
stop_service "${REGISTRY_SERVICE_NAME}" 1>/dev/null 2>&1
[ "${TRIES}" -ge "${MAX_RETRIES}" ] && echo "_start_registry Reach MAX_RETRIES ${MAX_RETRIES}" && return 1
TEST_REGISTRY="${REGISTRY_BASE}:${REGISTRY_PORT}"
echo "${SUITE_NAME} starting registry ${TEST_REGISTRY} "
# SC2046 (warning): Quote this to prevent word splitting.
# shellcheck disable=SC2046
if docker service create --quiet \
--name "${REGISTRY_SERVICE_NAME}" \
--restart-condition "on-failure" \
--restart-max-attempts 5 \
$(_location_constraints) \
--mode=replicated \
-p "${REGISTRY_PORT}:5000" \
"${REGISTRY_IMAGE}" 2>&1; then
break;
fi
if [ "${TRIES}" -ge "${MAX_RETRIES}" ]; then
echo "_start_registry Reach MAX_RETRIES ${MAX_RETRIES}" >&2
return 1
fi
TRIES=$((TRIES+1))
REGISTRY_PORT=$((REGISTRY_PORT+1))
sleep 1
REGISTRY_PORT=$(_next_available_port "${REGISTRY_PORT}" "${PORT_LIMIT}") || return 1
[ -z "${REGISTRY_PORT}" ] && return 1
TEST_REGISTRY="${REGISTRY_BASE}:${REGISTRY_PORT}"
echo -n "${SUITE_NAME} starting registry ${TEST_REGISTRY} again "
done
local REGISTRY_FILE=
REGISTRY_FILE=$(_get_test_registry_file "${SUITE_NAME}") || return 1
if ! REGISTRY_FILE=$(_get_test_registry_file "${SUITE_NAME}" 2>&1); then
echo "_start_registry _get_test_registry_file error: ${REGISTRY_FILE}" >&2
return 1
fi
echo "${SUITE_NAME} uses registry ${TEST_REGISTRY}."
echo "${TEST_REGISTRY}" > "${REGISTRY_FILE}"
}

Expand All @@ -212,7 +226,7 @@ _stop_registry() {
local REGISTRY_SERVICE_NAME="gantry-test-registry-${SUITE_NAME}"
local REGISTRY=
REGISTRY=$(load_test_registry "${SUITE_NAME}") || return 1
echo -n "Removing registry ${REGISTRY} "
echo "Removing registry ${REGISTRY} "
stop_service "${REGISTRY_SERVICE_NAME}"
local REGISTRY_FILE=
REGISTRY_FILE=$(_get_test_registry_file "${SUITE_NAME}") || return 1
Expand Down Expand Up @@ -380,7 +394,7 @@ build_test_image() {
FILE=$(mktemp)
echo "FROM alpinelinux/docker-cli:latest" > "${FILE}"
echo "ENTRYPOINT [\"sh\", \"-c\", \"echo $(unique_id); trap \\\"${EXIT_CMD}\\\" HUP INT TERM; ${TASK_CMD}\"]" >> "${FILE}"
echo -n "Building ${IMAGE_WITH_TAG} "
echo "Building ${IMAGE_WITH_TAG} "
timeout 120 docker build --quiet --tag "${IMAGE_WITH_TAG}" --file "${FILE}" .
rm "${FILE}"
}
Expand All @@ -390,7 +404,7 @@ build_and_push_test_image() {
local TASK_SECONDS="${2}"
local EXIT_SECONDS="${3}"
build_test_image "${IMAGE_WITH_TAG}" "${TASK_SECONDS}" "${EXIT_SECONDS}"
echo -n "Pushing image "
echo "Pushing image "
docker push --quiet "${IMAGE_WITH_TAG}"
}

Expand Down Expand Up @@ -418,7 +432,10 @@ wait_zero_running_tasks() {
_handle_failure "Failed to obtain task states of service ${SERVICE_NAME}: ${REPLICAS}"
return 1
fi
[ "${TRIES}" -ge "${MAX_RETRIES}" ] && echo "wait_zero_running_tasks Reach MAX_RETRIES ${MAX_RETRIES}" && return 1
if [ "${TRIES}" -ge "${MAX_RETRIES}" ]; then
echo "wait_zero_running_tasks Reach MAX_RETRIES ${MAX_RETRIES}" >&2
return 1
fi
TRIES=$((TRIES+1))
# https://docs.docker.com/engine/reference/commandline/service_ls/#examples
# The REPLICAS is like "5/5" or "1/1 (3/5 completed)"
Expand Down Expand Up @@ -454,7 +471,10 @@ _wait_service_state() {
local TRIES=0
local MAX_RETRIES=120
while ! docker service ps --format "{{.CurrentState}}" "${SERVICE_NAME}" | grep -q "${STATE}"; do
[ "${TRIES}" -ge "${MAX_RETRIES}" ] && echo "_wait_service_state Reach MAX_RETRIES ${MAX_RETRIES}" && return 1
if [ "${TRIES}" -ge "${MAX_RETRIES}" ]; then
echo "_wait_service_state Reach MAX_RETRIES ${MAX_RETRIES}" >&2
return 1
fi
TRIES=$((TRIES+1))
sleep 1
done
Expand All @@ -463,7 +483,7 @@ _wait_service_state() {
start_replicated_service() {
local SERVICE_NAME="${1}"
local IMAGE_WITH_TAG="${2}"
echo -n "Creating service ${SERVICE_NAME} in replicated mode "
echo "Creating service ${SERVICE_NAME} in replicated mode "
# SC2046 (warning): Quote this to prevent word splitting.
# shellcheck disable=SC2046
timeout 120 docker service create --quiet \
Expand All @@ -478,7 +498,7 @@ start_replicated_service() {
start_global_service() {
local SERVICE_NAME="${1}"
local IMAGE_WITH_TAG="${2}"
echo -n "Creating service ${SERVICE_NAME} in global mode "
echo "Creating service ${SERVICE_NAME} in global mode "
# SC2046 (warning): Quote this to prevent word splitting.
# shellcheck disable=SC2046
timeout 120 docker service create --quiet \
Expand All @@ -493,7 +513,7 @@ start_global_service() {
_start_replicated_job() {
local SERVICE_NAME="${1}"
local IMAGE_WITH_TAG="${2}"
echo -n "Creating service ${SERVICE_NAME} in replicated job mode "
echo "Creating service ${SERVICE_NAME} in replicated job mode "
# SC2046 (warning): Quote this to prevent word splitting.
# shellcheck disable=SC2046
timeout 120 docker service create --quiet \
Expand All @@ -509,7 +529,7 @@ _start_replicated_job() {

stop_service() {
local SERVICE_NAME="${1}"
echo -n "Removing service "
echo "Removing service "
docker service rm "${SERVICE_NAME}"
}

Expand Down Expand Up @@ -605,6 +625,7 @@ _run_gantry_container() {
--env "GANTRY_UPDATE_TIMEOUT_SECONDS=${GANTRY_UPDATE_TIMEOUT_SECONDS}" \
--env "GANTRY_CLEANUP_IMAGES=${GANTRY_CLEANUP_IMAGES}" \
--env "GANTRY_CLEANUP_IMAGES_OPTIONS=${GANTRY_CLEANUP_IMAGES_OPTIONS}" \
--env "GANTRY_CLEANUP_IMAGES_REMOVER=${GANTRY_CLEANUP_IMAGES_REMOVER}" \
--env "GANTRY_IMAGES_TO_REMOVE=${GANTRY_IMAGES_TO_REMOVE}" \
--env "GANTRY_NOTIFICATION_APPRISE_URL=${GANTRY_NOTIFICATION_APPRISE_URL}" \
--env "GANTRY_NOTIFICATION_TITLE=${GANTRY_NOTIFICATION_TITLE}" \
Expand Down

0 comments on commit 87ae432

Please sign in to comment.