jjb: Gently stop instances before publication
[lttng-ci.git] / pipelines / images / imagebuild.sh
CommitLineData
d329b32d
KS
1#!/usr/bin/bash -eux
2
3CLEANUP=()
4
5function cleanup {
6 set +e
7 for (( index=${#CLEANUP[@]}-1 ; index >= 0 ; index-- )) ;do
8 ${CLEANUP[$index]}
9 done
10 CLEANUP=()
11 set -e
12}
13
14function fail {
15 CODE="${1:-1}"
16 REASON="${2:-Unknown reason}"
17 cleanup
18 echo "${REASON}" >&2
19 exit "${CODE}"
20}
21
22trap cleanup EXIT TERM INT
23
24env
25
26REQUIRED_VARIABLES=(
27 OS # OS name
28 RELEASE # OS release
29 ARCH # The image architecture
30 IMAGE_TYPE # The image type to create
31 VARIANT # The variant of the base image to use
32 PROFILE # The ansible group to apply to the new image
33 GIT_BRANCH # The git branch of the automation repo to checkout
34 GIT_URL # The git URL of the automation repo to checkout
35 LXD_CLIENT_CERT # Path to LXD client certificate
36 LXD_CLIENT_KEY # Path to LXD client certificate key
37 SSH_PRIVATE_KEY # Path to SSH private key
38 TEST # 'true' to test launching published image
39)
40MISSING_VARS=0
41for var in "${REQUIRED_VARIABLES[@]}" ; do
42 if [ ! -v "$var" ] ; then
43 MISSING_VARS=1
44 echo "Missing required variable: '${var}'" >&2
45 fi
46done
47if [[ ! "${MISSING_VARS}" == "0" ]] ; then
48 fail 1 "Missing required variables"
49fi
50
51# Default optional variables
d95cc37f 52INSTANCE_START_TIMEOUT="${INSTANCE_START_TIMEOUT:-120}"
d329b32d
KS
53NETWORK_SLEEP="${NETWORK_SLEEP:-15}"
54
55# Dependencies
56apt-get -y install lxd-client ansible jq
57
58# Configuration
59mkdir -p ~/.config/lxc
60cp "${LXD_CLIENT_CERT}" ~/.config/lxc/client.crt
61cp "${LXD_CLIENT_KEY}" ~/.config/lxc/client.key
62CLEANUP+=(
63 "rm -f ${HOME}/.config/lxc/client.crt"
64 "rm -f ${HOME}/.config/lxc/client.key"
65)
66lxc remote add ci --accept-certificate --auth-type tls "${LXD_HOST}"
67lxc remote switch ci
68
69# Clone lttng-ci
70git clone -b "${GIT_BRANCH}" "${GIT_URL}" ci
71cd ci/automation/ansible || exit 1
72
73SOURCE_IMAGE_NAME="${OS}/${RELEASE}/${VARIANT}/${ARCH}"
74# Include IMAGE_TYPE since an alias may only be defined once even if the
75# type of the image differs
76TARGET_IMAGE_NAME="${OS}/${RELEASE}/${VARIANT}/${ARCH}/${PROFILE}/${IMAGE_TYPE}"
77INSTANCE_NAME=''
78# Try from local cache
79VM_ARG=()
80if [ "${IMAGE_TYPE}" == "vm" ] ; then
81 VM_ARG=("--vm")
82fi
83
84set +e
85# Test
86# It's possible that concurrent image creation when running parallel jobs causes
87# an error during the launch:
88# Error: Failed instance creation: UNIQUE constraint failed: images.project_id, images.fingerprint
89# C.f. https://github.com/canonical/lxd/issues/11636
90#
91TRIES_MAX=3
92TRIES=0
93while [[ "${TRIES}" -lt "${TRIES_MAX}" ]] ; do
953731b0 94 if ! INSTANCE_NAME=$(lxc -q launch "${VM_ARG[@]}" -p default -p "${LXD_INSTANCE_PROFILE}" "${SOURCE_IMAGE_NAME}/${IMAGE_TYPE}") ; then
d329b32d 95 # Try from images
953731b0 96 if ! INSTANCE_NAME=$(lxc -q launch "${VM_ARG[@]}" -p default -p "${LXD_INSTANCE_PROFILE}" images:"${SOURCE_IMAGE_NAME}") ; then
d329b32d
KS
97 TRIES=$((TRIES + 1))
98 echo "Failed to deployed ephemereal instance attempt ${TRIES}/${TRIES_MAX}"
99 if [[ "${TRIES}" -lt "${TRIES_MAX}" ]] ; then
100 continue
101 fi
102 fail 1 "Failed to deploy ephemereal instance"
103 else
104 break
105 fi
106 else
107 break
108 fi
109done
110INSTANCE_NAME="$(echo "${INSTANCE_NAME}" | cut -d ':' -f 2 | tr -d ' ')"
111set -e
112
113CLEANUP+=(
953731b0
KS
114 "lxc delete -f ${INSTANCE_NAME}"
115 "lxc stop ${INSTANCE_NAME}"
d329b32d
KS
116)
117
118# VMs may take more time to start, wait until instance is running
119TIME_REMAINING="${INSTANCE_START_TIMEOUT}"
120while true ; do
121 set +e
122 INSTANCE_STATUS=$(lxc exec "${INSTANCE_NAME}" hostname)
123 set -e
124 if [[ "${INSTANCE_STATUS}" == "${INSTANCE_NAME}" ]] ; then
125 break
126 fi
127 sleep 1
128 TIME_REMAINING=$((TIME_REMAINING - 1))
129 if [ "${TIME_REMAINING}" -lt "0" ] ; then
130 fail 1 "Timed out waiting for instance to become available via 'lxc exec'"
131 fi
132done
133
134# Wait for cloud-init to finish
135if [[ "${VARIANT}" == "cloud" ]] ; then
51144663
KS
136 # It's possible for cloud-init to fail, but to still be able to continue.
137 # Eg., a profile asks for netplan.io on a system that doesn't have that
138 # package available.
139 lxc exec "${INSTANCE_NAME}" -- cloud-init status -w || true
d329b32d
KS
140fi
141
142# Wait for instance to have an ip address (@TODO: is there a better approach?)
143sleep "${NETWORK_SLEEP}"
144
145# @TODO: Handle case when iputils2 is not installed
146INSTANCE_IP=''
147POTENTIAL_INTERFACES=(eth0 enp5s0)
148lxc exec "${INSTANCE_NAME}" -- ip a
149set +e
150for interface in "${POTENTIAL_INTERFACES[@]}" ; do
151 if ! DEV_INFO="$(lxc exec "${INSTANCE_NAME}" -- ip a show dev "${interface}")" ; then
152 continue
153 fi
154 INSTANCE_IP="$(echo "${DEV_INFO}" | grep -Eo 'inet [^ ]* ' | cut -d' ' -f2 | cut -d'/' -f1)"
155 if [[ "${INSTANCE_IP}" != "" ]] ; then
156 break
157 fi
158done
159set -e
160if [[ "${INSTANCE_IP}" == "" ]] ; then
161 fail 1 "Failed to determine instance IP address"
162fi
163
164ssh-keyscan "${INSTANCE_IP}" >> ~/.ssh/known_hosts2
165#lxc exec "${INSTANCE_NAME}" -- bash -c 'for i in /etc/ssh/ssh_host_*_key ; do ssh-keygen -l -f "$i" ; done' >> "${HOME}/.ssh/known_hosts"
166CLEANUP+=(
167 "rm -f ${HOME}/.ssh/known_hosts2"
168)
169cp "${SSH_PRIVATE_KEY}" ~/.ssh/id_rsa
170ssh-keygen -f ~/.ssh/id_rsa -y > ~/.ssh/id_rsa.pub
171CLEANUP+=(
172 "rm -f ${HOME}/.ssh/id_rsa.pub"
173 "rm -f ${HOME}/.ssh/id_rsa"
174)
175lxc file push ~/.ssh/id_rsa.pub "ci:${INSTANCE_NAME}/root/.ssh/authorized_keys2"
176
177# Confirm working SSH connection
178if ! ssh "${INSTANCE_IP}" hostname ; then
179 fail 1 "Unable to reach ephemereal instance over SSH"
180fi
181
182# Run playbook
183cat > fake-inventory <<EOF
184[${PROFILE/-/_}]
185${INSTANCE_IP}
186EOF
187CLEANUP+=(
188 "rm -f $(pwd)/fake-inventory"
189)
190
191LANG=C ANSIBLE_STRATEGY=linear ansible-playbook site.yml \
192 -e '{"compilers_legacy_install": false, "jenkins_user": false, "lttng_modules_checkout_repo": false}' \
193 -l "${INSTANCE_IP}" -i fake-inventory
194
195# Cleanup instance side
a2e5135c
KS
196LANG=C ANSIBLE_STRATEGY=linear ansible-playbook \
197 playbooks/post-imagebuild-clean.yml \
198 -l "${INSTANCE_IP}" -i fake-inventory
d329b32d 199
953731b0
KS
200# Graceful shutdown
201lxc stop "${INSTANCE_NAME}"
202
d329b32d 203# Publish
953731b0 204if FINGERPRINT=$(lxc publish "${INSTANCE_NAME}" 2>&1 | grep -E -o '[A-Fa-f0-9]{64}') ; then
9611d4c7
KS
205 echo "Published instance with fingerprint '${FINGERPRINT}'"
206else
207 fail 1 "No fingerprint for published instance"
208fi
d329b32d
KS
209
210TRIES=0
211
212if [[ "${TEST}" == "true" ]] ; then
213 set +e
214 while [[ "${TRIES}" -lt "${TRIES_MAX}" ]] ; do
9611d4c7 215 if ! INSTANCE_NAME=$(lxc -q launch -e "${VM_ARG[@]}" -p default -p "${LXD_INSTANCE_PROFILE}" "${FINGERPRINT}") ; then
d329b32d
KS
216 TRIES=$((TRIES + 1))
217 echo "Failed to launch instance try ${TRIES}/${TRIES_MAX}"
218 if [[ "${TRIES}" -lt "${TRIES_MAX}" ]] ; then
219 sleep $((1 + RANDOM % 10))
220 continue
221 fi
9611d4c7 222 fail 1 "Failed to launch an instance using newly published image '${FINGERPRINT}'"
d329b32d
KS
223 else
224 INSTANCE_NAME="$(echo "${INSTANCE_NAME}" | cut -d':' -f2 | tr -d ' ')"
225 CLEANUP+=(
226 "lxc stop -f ${INSTANCE_NAME}"
227 )
228 break
229 fi
230 done
231 set -e
232fi
9611d4c7
KS
233
234lxc image alias delete "${TARGET_IMAGE_NAME}" || true
235lxc image alias create "${TARGET_IMAGE_NAME}" "${FINGERPRINT}"
This page took 0.031018 seconds and 4 git commands to generate.