ecc192dae06bd0d3c166b2d20c2b8a91c51cabaf
[lttng-ci.git] / pipelines / images / imagebuild.sh
1 #!/usr/bin/bash -eux
2
3 CLEANUP=()
4
5 function cleanup {
6 set +e
7 for (( index=${#CLEANUP[@]}-1 ; index >= 0 ; index-- )) ;do
8 ${CLEANUP[$index]}
9 done
10 CLEANUP=()
11 set -e
12 }
13
14 function fail {
15 CODE="${1:-1}"
16 REASON="${2:-Unknown reason}"
17 cleanup
18 echo "${REASON}" >&2
19 exit "${CODE}"
20 }
21
22 trap cleanup EXIT TERM INT
23
24 env
25
26 REQUIRED_VARIABLES=(
27 OS # OS name
28 RELEASE # OS release
29 ARCH # The image architecture
30 IMAGE_TYPE # The image type to create
31 VARIANT # The variant of the base image to use
32 PROFILE # The ansible group to apply to the new image
33 GIT_BRANCH # The git branch of the automation repo to checkout
34 GIT_URL # The git URL of the automation repo to checkout
35 LXD_CLIENT_CERT # Path to LXD client certificate
36 LXD_CLIENT_KEY # Path to LXD client certificate key
37 SSH_PRIVATE_KEY # Path to SSH private key
38 TEST # 'true' to test launching published image
39 )
40 MISSING_VARS=0
41 for var in "${REQUIRED_VARIABLES[@]}" ; do
42 if [ ! -v "$var" ] ; then
43 MISSING_VARS=1
44 echo "Missing required variable: '${var}'" >&2
45 fi
46 done
47 if [[ ! "${MISSING_VARS}" == "0" ]] ; then
48 fail 1 "Missing required variables"
49 fi
50
51 # Default optional variables
52 INSTANCE_START_TIMEOUT="${INSTANCE_START_TIMEOUT:-30}"
53 NETWORK_SLEEP="${NETWORK_SLEEP:-15}"
54
55 # Dependencies
56 apt-get -y install lxd-client ansible jq
57
58 # Configuration
59 mkdir -p ~/.config/lxc
60 cp "${LXD_CLIENT_CERT}" ~/.config/lxc/client.crt
61 cp "${LXD_CLIENT_KEY}" ~/.config/lxc/client.key
62 CLEANUP+=(
63 "rm -f ${HOME}/.config/lxc/client.crt"
64 "rm -f ${HOME}/.config/lxc/client.key"
65 )
66 lxc remote add ci --accept-certificate --auth-type tls "${LXD_HOST}"
67 lxc remote switch ci
68
69 # Clone lttng-ci
70 git clone -b "${GIT_BRANCH}" "${GIT_URL}" ci
71 cd ci/automation/ansible || exit 1
72
73 SOURCE_IMAGE_NAME="${OS}/${RELEASE}/${VARIANT}/${ARCH}"
74 # Include IMAGE_TYPE since an alias may only be defined once even if the
75 # type of the image differs
76 TARGET_IMAGE_NAME="${OS}/${RELEASE}/${VARIANT}/${ARCH}/${PROFILE}/${IMAGE_TYPE}"
77 INSTANCE_NAME=''
78 # Try from local cache
79 VM_ARG=()
80 if [ "${IMAGE_TYPE}" == "vm" ] ; then
81 VM_ARG=("--vm")
82 fi
83
84 set +e
85 # Test
86 # It's possible that concurrent image creation when running parallel jobs causes
87 # an error during the launch:
88 # Error: Failed instance creation: UNIQUE constraint failed: images.project_id, images.fingerprint
89 # C.f. https://github.com/canonical/lxd/issues/11636
90 #
91 TRIES_MAX=3
92 TRIES=0
93 while [[ "${TRIES}" -lt "${TRIES_MAX}" ]] ; do
94 if ! INSTANCE_NAME=$(lxc -q launch -e "${VM_ARG[@]}" -p default -p "${LXD_INSTANCE_PROFILE}" "${SOURCE_IMAGE_NAME}/${IMAGE_TYPE}") ; then
95 # Try from images
96 if ! INSTANCE_NAME=$(lxc -q launch -e "${VM_ARG[@]}" -p default -p "${LXD_INSTANCE_PROFILE}" images:"${SOURCE_IMAGE_NAME}") ; then
97 TRIES=$((TRIES + 1))
98 echo "Failed to deployed ephemereal instance attempt ${TRIES}/${TRIES_MAX}"
99 if [[ "${TRIES}" -lt "${TRIES_MAX}" ]] ; then
100 continue
101 fi
102 fail 1 "Failed to deploy ephemereal instance"
103 else
104 break
105 fi
106 else
107 break
108 fi
109 done
110 INSTANCE_NAME="$(echo "${INSTANCE_NAME}" | cut -d ':' -f 2 | tr -d ' ')"
111 set -e
112
113 CLEANUP+=(
114 "lxc stop ${INSTANCE_NAME}"
115 )
116
117 # VMs may take more time to start, wait until instance is running
118 TIME_REMAINING="${INSTANCE_START_TIMEOUT}"
119 while true ; do
120 set +e
121 INSTANCE_STATUS=$(lxc exec "${INSTANCE_NAME}" hostname)
122 set -e
123 if [[ "${INSTANCE_STATUS}" == "${INSTANCE_NAME}" ]] ; then
124 break
125 fi
126 sleep 1
127 TIME_REMAINING=$((TIME_REMAINING - 1))
128 if [ "${TIME_REMAINING}" -lt "0" ] ; then
129 fail 1 "Timed out waiting for instance to become available via 'lxc exec'"
130 fi
131 done
132
133 # Wait for cloud-init to finish
134 if [[ "${VARIANT}" == "cloud" ]] ; then
135 lxc exec "${INSTANCE_NAME}" -- cloud-init status -w
136 fi
137
138 # Wait for instance to have an ip address (@TODO: is there a better approach?)
139 sleep "${NETWORK_SLEEP}"
140
141 # @TODO: Handle case when iputils2 is not installed
142 INSTANCE_IP=''
143 POTENTIAL_INTERFACES=(eth0 enp5s0)
144 lxc exec "${INSTANCE_NAME}" -- ip a
145 set +e
146 for interface in "${POTENTIAL_INTERFACES[@]}" ; do
147 if ! DEV_INFO="$(lxc exec "${INSTANCE_NAME}" -- ip a show dev "${interface}")" ; then
148 continue
149 fi
150 INSTANCE_IP="$(echo "${DEV_INFO}" | grep -Eo 'inet [^ ]* ' | cut -d' ' -f2 | cut -d'/' -f1)"
151 if [[ "${INSTANCE_IP}" != "" ]] ; then
152 break
153 fi
154 done
155 set -e
156 if [[ "${INSTANCE_IP}" == "" ]] ; then
157 fail 1 "Failed to determine instance IP address"
158 fi
159
160 ssh-keyscan "${INSTANCE_IP}" >> ~/.ssh/known_hosts2
161 #lxc exec "${INSTANCE_NAME}" -- bash -c 'for i in /etc/ssh/ssh_host_*_key ; do ssh-keygen -l -f "$i" ; done' >> "${HOME}/.ssh/known_hosts"
162 CLEANUP+=(
163 "rm -f ${HOME}/.ssh/known_hosts2"
164 )
165 cp "${SSH_PRIVATE_KEY}" ~/.ssh/id_rsa
166 ssh-keygen -f ~/.ssh/id_rsa -y > ~/.ssh/id_rsa.pub
167 CLEANUP+=(
168 "rm -f ${HOME}/.ssh/id_rsa.pub"
169 "rm -f ${HOME}/.ssh/id_rsa"
170 )
171 lxc file push ~/.ssh/id_rsa.pub "ci:${INSTANCE_NAME}/root/.ssh/authorized_keys2"
172
173 # Confirm working SSH connection
174 if ! ssh "${INSTANCE_IP}" hostname ; then
175 fail 1 "Unable to reach ephemereal instance over SSH"
176 fi
177
178 # Run playbook
179 cat > fake-inventory <<EOF
180 [${PROFILE/-/_}]
181 ${INSTANCE_IP}
182 EOF
183 CLEANUP+=(
184 "rm -f $(pwd)/fake-inventory"
185 )
186
187 LANG=C ANSIBLE_STRATEGY=linear ansible-playbook site.yml \
188 -e '{"compilers_legacy_install": false, "jenkins_user": false, "lttng_modules_checkout_repo": false}' \
189 -l "${INSTANCE_IP}" -i fake-inventory
190
191 # Cleanup instance side
192 # @TODO: Distro switch for apt/dnf/yum/etc.
193 lxc exec "${INSTANCE_NAME}" -- apt-get clean
194 lxc exec "${INSTANCE_NAME}" -- rm -rf /root/.ssh/authorized_keys2
195 lxc exec "${INSTANCE_NAME}" -- cloud-init clean
196 lxc exec "${INSTANCE_NAME}" -- bash -c 'history -cw'
197
198 # Publish
199 lxc publish "${INSTANCE_NAME}" --alias "${TARGET_IMAGE_NAME}" -f
200
201 TRIES=0
202
203 if [[ "${TEST}" == "true" ]] ; then
204 set +e
205 while [[ "${TRIES}" -lt "${TRIES_MAX}" ]] ; do
206 if ! INSTANCE_NAME=$(lxc -q launch -e "${VM_ARG[@]}" -p default -p "${LXD_INSTANCE_PROFILE}" "${TARGET_IMAGE_NAME}") ; then
207 TRIES=$((TRIES + 1))
208 echo "Failed to launch instance try ${TRIES}/${TRIES_MAX}"
209 if [[ "${TRIES}" -lt "${TRIES_MAX}" ]] ; then
210 sleep $((1 + RANDOM % 10))
211 continue
212 fi
213 fail 1 "Failed to launch an instance using newly published image '${TARGET_IMAGE_NAME}'"
214 else
215 INSTANCE_NAME="$(echo "${INSTANCE_NAME}" | cut -d':' -f2 | tr -d ' ')"
216 CLEANUP+=(
217 "lxc stop -f ${INSTANCE_NAME}"
218 )
219 break
220 fi
221 done
222 set -e
223 fi
This page took 0.033044 seconds and 3 git commands to generate.