#!/bin/bash # Copyright 2016 The Kubernetes Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # This script performs disaster recovery of etcd from the backup data. # Assumptions: # - backup was done using etcdctl command: # a) in case of etcd2 # $ etcdctl backup --data-dir= # produced .snap and .wal files # b) in case of etcd3 # $ etcdctl --endpoints=
snapshot save # produced .db file # - version.txt file is in the current directy (if it isn't it will be # defaulted to "2.2.1/etcd2"). Based on this file, the script will # decide to which version we are restoring (procedures are different # for etcd2 and etcd3). # - in case of etcd2 - *.snap and *.wal files are in current directory # - in case of etcd3 - *.db file is in the current directory # - the script is run as root # - for event etcd, we only support clearing it - to do it, you need to # set RESET_EVENT_ETCD=true env var. set -o errexit set -o nounset set -o pipefail # Version file contains information about current version in the format: # / (e.g. "3.0.12/etcd3"). # # If the file doesn't exist we assume "2.2.1/etcd2" configuration is # the current one and create a file with such configuration. # The restore procedure is chosen based on this information. global VERSION_FILE := '"version.txt'" # Make it possible to overwrite version file (or default version) # with VERSION_CONTENTS env var. if test -n $(VERSION_CONTENTS:-) { echo $(VERSION_CONTENTS) > $(VERSION_FILE) } if test ! -f $(VERSION_FILE) { echo "2.2.1/etcd2" > $(VERSION_FILE) } global VERSION_CONTENTS := $[cat $(VERSION_FILE)] global ETCD_VERSION := $[echo $VERSION_CONTENTS | cut -d '/' -f 1] global ETCD_API := $[echo $VERSION_CONTENTS | cut -d '/' -f 2] # Name is used only in case of etcd3 mode, to appropriate set the metadata # for the etcd data. # NOTE: NAME HAS TO BE EQUAL TO WHAT WE USE IN --name flag when starting etcd. global NAME := $(NAME:-etcd-$(hostname)) # Port on which etcd is exposed. global etcd_port := '2379' global event_etcd_port := '4002' # Wait until both etcd instances are up proc wait_for_etcd_up { global port := $1 # TODO: As of 3.0.x etcd versions, all 2.* and 3.* versions return # {"health": "true"} on /health endpoint in healthy case. # However, we should come with a regex for it to avoid future break. global health_ok := '"{\"health\": \"true\"}'" for i in [$[seq 120]] { # TODO: Is it enough to look into /health endpoint? global health := $[curl --silent http://127.0.0.1:$(port)/health] if test $(health) == $(health_ok) { return 0 } sleep 1 } return 1 } # Wait until apiserver is up. proc wait_for_cluster_healthy { for i in [$[seq 120]] { global cs_status := $[kubectl get componentstatuses -o template --template='{{range .items}}{{with index .conditions 0}}{{.type}}:{{.status}}{{end}}{{"\n"}}{{end}}] || true global componentstatuses := $[echo $(cs_status) | grep -c 'Healthy:] || true global healthy := $[echo $(cs_status) | grep -c 'Healthy:True] || true if test $(componentstatuses) -eq $(healthy) { return 0 } sleep 1 } return 1 } # Wait until etcd and apiserver pods are down. proc wait_for_etcd_and_apiserver_down { for i in [$[seq 120]] { global etcd := $[docker ps | grep etcd | grep -v etcd-empty-dir | grep -v etcd-monitor | wc -l] global apiserver := $[docker ps | grep apiserver | wc -l] # TODO: Theoretically it is possible, that apiserver and or etcd # are currently down, but Kubelet is now restarting them and they # will reappear again. We should avoid it. if test $(etcd) -eq "0" -a $(apiserver) -eq "0" { return 0 } sleep 1 } return 1 } # Move the manifest files to stop etcd and kube-apiserver # while we swap the data out from under them. global MANIFEST_DIR := '"/etc/kubernetes/manifests'" global MANIFEST_BACKUP_DIR := '"/etc/kubernetes/manifests-backups'" mkdir -p $(MANIFEST_BACKUP_DIR) echo "Moving etcd(s) & apiserver manifest files to $(MANIFEST_BACKUP_DIR)" # If those files were already moved (e.g. during previous # try of backup) don't fail on it. mv "$(MANIFEST_DIR)/kube-apiserver.manifest" $(MANIFEST_BACKUP_DIR) || true mv "$(MANIFEST_DIR)/etcd.manifest" $(MANIFEST_BACKUP_DIR) || true mv "$(MANIFEST_DIR)/etcd-events.manifest" $(MANIFEST_BACKUP_DIR) || true # Wait for the pods to be stopped echo "Waiting for etcd and kube-apiserver to be down" if ! wait_for_etcd_and_apiserver_down { # Couldn't kill etcd and apiserver. echo "Downing etcd and apiserver failed" exit 1 } # Create the sort of directory structure that etcd expects. # If this directory already exists, remove it. global BACKUP_DIR := '"/var/tmp/backup'" rm -rf $(BACKUP_DIR) if test $(ETCD_API) == "etcd2" { echo "Preparing etcd backup data for restore" # In v2 mode, we simply copy both snap and wal files to a newly created # directory. After that, we start etcd with --force-new-cluster option # that (according to the etcd documentation) is required to recover from # a backup. echo "Copying data to $(BACKUP_DIR) and restoring there" mkdir -p "$(BACKUP_DIR)/member/snap" mkdir -p "$(BACKUP_DIR)/member/wal" # If the cluster is relatively new, there can be no .snap file. mv *.snap "$(BACKUP_DIR)/member/snap/" || true mv *.wal "$(BACKUP_DIR)/member/wal/" # TODO(jsz): This won't work with HA setups (e.g. do we need to set --name flag)? echo "Starting etcd $(ETCD_VERSION) to restore data" global image := $[docker run -d -v $(BACKUP_DIR):/var/etcd/data \ --net=host -p $(etcd_port):$(etcd_port) \ "gcr.io/google_containers/etcd:$(ETCD_VERSION)" /bin/sh -c \ "/usr/local/bin/etcd --data-dir /var/etcd/data --force-new-cluster] if test "$Status" -ne "0" { echo "Docker container didn't started correctly" exit 1 } echo "Container $(image) created, waiting for etcd to report as healthy" if ! wait_for_etcd_up $(etcd_port) { echo "Etcd didn't come back correctly" exit 1 } # Kill that etcd instance. echo "Etcd healthy - killing $(image) container" docker kill $(image) } elif test $(ETCD_API) == "etcd3" { echo "Preparing etcd snapshot for restore" mkdir -p $(BACKUP_DIR) echo "Copying data to $(BACKUP_DIR) and restoring there" global number_files := $[find . -maxdepth 1 -type f -name "*.db" | wc -l] if test $(number_files) -ne "1" { echo "Incorrect number of *.db files - expected 1" exit 1 } mv *.db "$(BACKUP_DIR)/" global snapshot := $[ls $(BACKUP_DIR)] # Run etcdctl snapshot restore command and wait until it is finished. # setting with --name in the etcd manifest file and then it seems to work. # TODO(jsz): This command may not work in case of HA. global image := $[docker run -d -v $(BACKUP_DIR):/var/tmp/backup --env ETCDCTL_API=3 \ "gcr.io/google_containers/etcd:$(ETCD_VERSION)" /bin/sh -c \ "/usr/local/bin/etcdctl snapshot restore $(BACKUP_DIR)/$(snapshot) --name $(NAME) --initial-cluster $(NAME)=http://localhost:2380; mv /$(NAME).etcd/member /var/tmp/backup/] if test "$Status" -ne "0" { echo "Docker container didn't started correctly" exit 1 } echo "Prepare container exit code: $[docker wait $(image)]" rm -f "$(BACKUP_DIR)/$(snapshot)" } # Also copy version.txt file. cp $(VERSION_FILE) $(BACKUP_DIR) # Find out if we are running GCI vs CVM. export CVM=$[curl "http://metadata/computeMetadata/v1/instance/attributes/" -H "Metadata-Flavor: Google" |& grep -q gci; echo $Status] if [[ "$CVM" == "1" ]] { export MNT_DISK="/mnt/master-pd" } else { export MNT_DISK="/mnt/disks/master-pd" } # Save the corrupted data (clean directory if it is already non-empty). rm -rf "$(MNT_DISK)/var/etcd-corrupted" mkdir -p "$(MNT_DISK)/var/etcd-corrupted" echo "Saving corrupted data to $(MNT_DISK)/var/etcd-corrupted" mv /var/etcd/data "$(MNT_DISK)/var/etcd-corrupted" # Replace the corrupted data dir with the resotred data. echo "Copying restored data to /var/etcd/data" mv $(BACKUP_DIR) /var/etcd/data if test $(RESET_EVENT_ETCD:-) == "true" { echo "Removing event-etcd corrupted data" global EVENTS_CORRUPTED_DIR := ""$(MNT_DISK)/var/etcd-events-corrupted"" # Save the corrupted data (clean directory if it is already non-empty). rm -rf $(EVENTS_CORRUPTED_DIR) mkdir -p $(EVENTS_CORRUPTED_DIR) mv /var/etcd/data-events $(EVENTS_CORRUPTED_DIR) } # Start etcd and kube-apiserver again. echo "Restarting etcd and apiserver from restored snapshot" mv "$(MANIFEST_BACKUP_DIR)"/* "$(MANIFEST_DIR)/" rm -rf $(MANIFEST_BACKUP_DIR) # Verify that etcd is back. echo "Waiting for etcd to come back" if ! wait_for_etcd_up $(etcd_port) { echo "Etcd didn't come back correctly" exit 1 } # Verify that event etcd is back. echo "Waiting for event etcd to come back" if ! wait_for_etcd_up $(event_etcd_port) { echo "Event etcd didn't come back correctly" exit 1 } # Verify that kube-apiserver is back and cluster is healthy. echo "Waiting for apiserver to come back" if ! wait_for_cluster_healthy { echo "Apiserver didn't come back correctly" exit 1 } echo "Cluster successfully restored!" (CommandList children: [ (C {(set)} {(-o)} {(errexit)}) (C {(set)} {(-o)} {(nounset)}) (C {(set)} {(-o)} {(pipefail)}) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:VERSION_FILE) op: Equal rhs: {(DQ (version.txt))} spids: [136] ) ] spids: [136] ) (If arms: [ (if_arm cond: [ (Sentence child: (C {(Lit_Other "[")} {(-n)} { (DQ (BracedVarSub token: suffix_op: (StringUnary op_id:VTest_ColonHyphen arg_word:{(SQ )}) spids: [155 158] ) ) } {(Lit_Other "]")} ) terminator: ) ] action: [ (SimpleCommand words: [{(echo)} {(DQ (${ VSub_Name VERSION_CONTENTS))}] redirects: [ (Redir op_id: Redir_Great fd: -1 arg_word: {(DQ (${ VSub_Name VERSION_FILE))} spids: [175] ) ] ) ] spids: [-1 164] ) ] spids: [-1 183] ) (If arms: [ (if_arm cond: [ (Sentence child: (C {(Lit_Other "[")} {(KW_Bang "!")} {(-f)} {(DQ (${ VSub_Name VERSION_FILE))} {(Lit_Other "]")} ) terminator: ) ] action: [ (SimpleCommand words: [{(echo)} {(DQ (2.2.1/etcd2))}] redirects: [ (Redir op_id: Redir_Great fd: -1 arg_word: {(DQ (${ VSub_Name VERSION_FILE))} spids: [211] ) ] ) ] spids: [-1 202] ) ] spids: [-1 219] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:VERSION_CONTENTS) op: Equal rhs: { (DQ (CommandSubPart command_list: (CommandList children:[(C {(cat)} {(${ VSub_Name VERSION_FILE)})]) left_token: spids: [223 229] ) ) } spids: [221] ) ] spids: [221] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:ETCD_VERSION) op: Equal rhs: { (DQ (CommandSubPart command_list: (CommandList children: [ (Pipeline children: [ (C {(echo)} {($ VSub_Name "$VERSION_CONTENTS")}) (C {(cut)} {(-d)} {(SQ )} {(-f)} {(1)}) ] negated: False ) ] ) left_token: spids: [234 252] ) ) } spids: [232] ) ] spids: [232] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:ETCD_API) op: Equal rhs: { (DQ (CommandSubPart command_list: (CommandList children: [ (Pipeline children: [ (C {(echo)} {($ VSub_Name "$VERSION_CONTENTS")}) (C {(cut)} {(-d)} {(SQ )} {(-f)} {(2)}) ] negated: False ) ] ) left_token: spids: [257 275] ) ) } spids: [255] ) ] spids: [255] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:NAME) op: Equal rhs: { (DQ (BracedVarSub token: suffix_op: (StringUnary op_id: VTest_ColonHyphen arg_word: {(etcd-) (CommandSubPart command_list: (CommandList children:[(C {(hostname)})]) left_token: spids: [294 296] ) } ) spids: [290 297] ) ) } spids: [288] ) ] spids: [288] ) (Assignment keyword: Assign_None pairs: [(assign_pair lhs:(LhsName name:etcd_port) op:Equal rhs:{(2379)} spids:[304])] spids: [304] ) (Assignment keyword: Assign_None pairs: [(assign_pair lhs:(LhsName name:event_etcd_port) op:Equal rhs:{(4002)} spids:[307])] spids: [307] ) (FuncDef name: wait_for_etcd_up body: (BraceGroup children: [ (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:port) op: Equal rhs: {($ VSub_Number "$1")} spids: [321] ) ] spids: [321] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:health_ok) op: Equal rhs: { (DQ ("{") (EscapedLiteralPart token:) (health) (EscapedLiteralPart token:) (": ") (EscapedLiteralPart token:) (true) (EscapedLiteralPart token:) ("}") ) } spids: [337] ) ] spids: [337] ) (ForEach iter_name: i iter_words: [ { (CommandSubPart command_list: (CommandList children:[(C {(seq)} {(120)})]) left_token: spids: [357 361] ) } ] do_arg_iter: False body: (DoGroup children: [ (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:health) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (C {(curl)} {(--silent)} {(http) (Lit_Other ":") (//127.0.0.1) (Lit_Other ":") (${ VSub_Name port) (/health) } ) ] ) left_token: spids: [372 385] ) } spids: [371] ) ] spids: [371] ) (If arms: [ (if_arm cond: [ (Sentence child: (C {(Lit_Other "[")} {(DQ (${ VSub_Name health))} {(Lit_Other "=") (Lit_Other "=")} {(DQ (${ VSub_Name health_ok))} {(Lit_Other "]")} ) terminator: ) ] action: [(ControlFlow token: arg_word:{(0)})] spids: [-1 410] ) ] spids: [-1 418] ) (C {(sleep)} {(1)}) ] spids: [364 426] ) spids: [356 362] ) (ControlFlow token: arg_word:{(1)}) ] spids: [318] ) spids: [314 317] ) (FuncDef name: wait_for_cluster_healthy body: (BraceGroup children: [ (ForEach iter_name: i iter_words: [ { (CommandSubPart command_list: (CommandList children:[(C {(seq)} {(120)})]) left_token: spids: [452 456] ) } ] do_arg_iter: False body: (DoGroup children: [ (AndOr children: [ (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:cs_status) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (C {(kubectl)} {(get)} {(componentstatuses)} {(-o)} {(template)} {(--template) (Lit_Other "=") (SQ < "{{range .items}}{{with index .conditions 0}}{{.type}}:{{.status}}{{end}}{{\"\\n\"}}{{end}}" > ) } ) ] ) left_token: spids: [463 479] ) } spids: [462] ) ] spids: [462] ) (C {(true)}) ] op_id: Op_DPipe ) (AndOr children: [ (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:componentstatuses) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (Pipeline children: [ (C {(echo)} {(DQ (${ VSub_Name cs_status))}) (C {(grep)} {(-c)} {(SQ <"Healthy:">)}) ] negated: False ) ] ) left_token: spids: [487 505] ) } spids: [486] ) ] spids: [486] ) (C {(true)}) ] op_id: Op_DPipe ) (AndOr children: [ (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:healthy) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (Pipeline children: [ (C {(echo)} {(DQ (${ VSub_Name cs_status))}) (C {(grep)} {(-c)} {(SQ <"Healthy:True">)}) ] negated: False ) ] ) left_token: spids: [513 531] ) } spids: [512] ) ] spids: [512] ) (C {(true)}) ] op_id: Op_DPipe ) (If arms: [ (if_arm cond: [ (Sentence child: (C {(Lit_Other "[")} {(DQ (${ VSub_Name componentstatuses))} {(-eq)} {(DQ (${ VSub_Name healthy))} {(Lit_Other "]")} ) terminator: ) ] action: [(ControlFlow token: arg_word:{(0)})] spids: [-1 559] ) ] spids: [-1 567] ) (C {(sleep)} {(1)}) ] spids: [459 575] ) spids: [451 457] ) (ControlFlow token: arg_word:{(1)}) ] spids: [443] ) spids: [439 442] ) (FuncDef name: wait_for_etcd_and_apiserver_down body: (BraceGroup children: [ (ForEach iter_name: i iter_words: [ { (CommandSubPart command_list: (CommandList children:[(C {(seq)} {(120)})]) left_token: spids: [601 605] ) } ] do_arg_iter: False body: (DoGroup children: [ (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:etcd) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (Pipeline children: [ (C {(docker)} {(ps)}) (C {(grep)} {(etcd)}) (C {(grep)} {(-v)} {(etcd-empty-dir)}) (C {(grep)} {(-v)} {(etcd-monitor)}) (C {(wc)} {(-l)}) ] negated: False ) ] ) left_token: spids: [612 644] ) } spids: [611] ) ] spids: [611] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:apiserver) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (Pipeline children: [ (C {(docker)} {(ps)}) (C {(grep)} {(apiserver)}) (C {(wc)} {(-l)}) ] negated: False ) ] ) left_token: spids: [648 664] ) } spids: [647] ) ] spids: [647] ) (If arms: [ (if_arm cond: [ (Sentence child: (C {(Lit_Other "[")} {(DQ (${ VSub_Name etcd))} {(-eq)} {(DQ (0))} {(-a)} {(DQ (${ VSub_Name apiserver))} {(-eq)} {(DQ (0))} {(Lit_Other "]")} ) terminator: ) ] action: [(ControlFlow token: arg_word:{(0)})] spids: [-1 712] ) ] spids: [-1 720] ) (C {(sleep)} {(1)}) ] spids: [608 728] ) spids: [600 606] ) (ControlFlow token: arg_word:{(1)}) ] spids: [592] ) spids: [588 591] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:MANIFEST_DIR) op: Equal rhs: {(DQ (/etc/kubernetes/manifests))} spids: [744] ) ] spids: [744] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:MANIFEST_BACKUP_DIR) op: Equal rhs: {(DQ (/etc/kubernetes/manifests-backups))} spids: [749] ) ] spids: [749] ) (C {(mkdir)} {(-p)} {(DQ (${ VSub_Name MANIFEST_BACKUP_DIR))}) (C {(echo)} {(DQ ("Moving etcd(s) & apiserver manifest files to ") (${ VSub_Name MANIFEST_BACKUP_DIR))} ) (AndOr children: [ (C {(mv)} {(DQ (${ VSub_Name MANIFEST_DIR) (/kube-apiserver.manifest))} {(DQ (${ VSub_Name MANIFEST_BACKUP_DIR))} ) (C {(true)}) ] op_id: Op_DPipe ) (AndOr children: [ (C {(mv)} {(DQ (${ VSub_Name MANIFEST_DIR) (/etcd.manifest))} {(DQ (${ VSub_Name MANIFEST_BACKUP_DIR))} ) (C {(true)}) ] op_id: Op_DPipe ) (AndOr children: [ (C {(mv)} {(DQ (${ VSub_Name MANIFEST_DIR) (/etcd-events.manifest))} {(DQ (${ VSub_Name MANIFEST_BACKUP_DIR))} ) (C {(true)}) ] op_id: Op_DPipe ) (C {(echo)} {(DQ ("Waiting for etcd and kube-apiserver to be down"))}) (If arms: [ (if_arm cond: [ (Sentence child: (Pipeline children:[(C {(wait_for_etcd_and_apiserver_down)})] negated:True) terminator: ) ] action: [(C {(echo)} {(DQ ("Downing etcd and apiserver failed"))}) (C {(exit)} {(1)})] spids: [-1 853] ) ] spids: [-1 871] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:BACKUP_DIR) op: Equal rhs: {(DQ (/var/tmp/backup))} spids: [880] ) ] spids: [880] ) (C {(rm)} {(-rf)} {(DQ (${ VSub_Name BACKUP_DIR))}) (If arms: [ (if_arm cond: [ (Sentence child: (C {(Lit_Other "[")} {(DQ (${ VSub_Name ETCD_API))} {(Lit_Other "=") (Lit_Other "=")} {(DQ (etcd2))} {(Lit_Other "]")} ) terminator: ) ] action: [ (C {(echo)} {(DQ ("Preparing etcd backup data for restore"))}) (C {(echo)} {(DQ ("Copying data to ") (${ VSub_Name BACKUP_DIR) (" and restoring there"))}) (C {(mkdir)} {(-p)} {(DQ (${ VSub_Name BACKUP_DIR) (/member/snap))}) (C {(mkdir)} {(-p)} {(DQ (${ VSub_Name BACKUP_DIR) (/member/wal))}) (AndOr children: [ (C {(mv)} {(Lit_Other "*") (.snap)} {(DQ (${ VSub_Name BACKUP_DIR) (/member/snap/))}) (C {(true)}) ] op_id: Op_DPipe ) (C {(mv)} {(Lit_Other "*") (.wal)} {(DQ (${ VSub_Name BACKUP_DIR) (/member/wal/))}) (C {(echo)} {(DQ ("Starting etcd ") (${ VSub_Name ETCD_VERSION) (" to restore data"))}) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:image) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (C {(docker)} {(run)} {(-d)} {(-v)} {(${ VSub_Name BACKUP_DIR) (Lit_Other ":") (/var/etcd/data)} {(--net) (Lit_Other "=") (host)} {(-p)} {(${ VSub_Name etcd_port) (Lit_Other ":") (${ VSub_Name etcd_port)} {(DQ ("gcr.io/google_containers/etcd:") (${ VSub_Name ETCD_VERSION))} {(/bin/sh)} {(-c)} { (DQ ( "/usr/local/bin/etcd --data-dir /var/etcd/data --force-new-cluster" ) ) } ) ] ) left_token: spids: [1027 1076] ) } spids: [1026] ) ] spids: [1026] ) (If arms: [ (if_arm cond: [ (Sentence child: (C {(Lit_Other "[")} {(DQ ($ VSub_QMark "$?"))} {(-ne)} {(DQ (0))} {(Lit_Other "]")} ) terminator: ) ] action: [ (C {(echo)} {(DQ ("Docker container didn't started correctly"))}) (C {(exit)} {(1)}) ] spids: [-1 1096] ) ] spids: [-1 1111] ) (C {(echo)} { (DQ ("Container ") (${ VSub_Name image) (" created, waiting for etcd to report as healthy") ) } ) (If arms: [ (if_arm cond: [ (Sentence child: (Pipeline children: [(C {(wait_for_etcd_up)} {(DQ (${ VSub_Name etcd_port))})] negated: True ) terminator: ) ] action: [(C {(echo)} {(DQ ("Etcd didn't come back correctly"))}) (C {(exit)} {(1)})] spids: [-1 1139] ) ] spids: [-1 1154] ) (C {(echo)} {(DQ ("Etcd healthy - killing ") (${ VSub_Name image) (" container"))}) (C {(docker)} {(kill)} {(DQ (${ VSub_Name image))}) ] spids: [-1 915] ) (if_arm cond: [ (Sentence child: (C {(Lit_Other "[")} {(DQ (${ VSub_Name ETCD_API))} {(Lit_Other "=") (Lit_Other "=")} {(DQ (etcd3))} {(Lit_Other "]")} ) terminator: ) ] action: [ (C {(echo)} {(DQ ("Preparing etcd snapshot for restore"))}) (C {(mkdir)} {(-p)} {(DQ (${ VSub_Name BACKUP_DIR))}) (C {(echo)} {(DQ ("Copying data to ") (${ VSub_Name BACKUP_DIR) (" and restoring there"))}) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:number_files) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (Pipeline children: [ (C {(find)} {(.)} {(-maxdepth)} {(1)} {(-type)} {(f)} {(-name)} {(DQ ("*.db"))} ) (C {(wc)} {(-l)}) ] negated: False ) ] ) left_token: spids: [1236 1260] ) } spids: [1235] ) ] spids: [1235] ) (If arms: [ (if_arm cond: [ (Sentence child: (C {(Lit_Other "[")} {(DQ (${ VSub_Name number_files))} {(-ne)} {(DQ (1))} {(Lit_Other "]")} ) terminator: ) ] action: [ (C {(echo)} {(DQ ("Incorrect number of *.db files - expected 1"))}) (C {(exit)} {(1)}) ] spids: [-1 1282] ) ] spids: [-1 1297] ) (C {(mv)} {(Lit_Other "*") (.db)} {(DQ (${ VSub_Name BACKUP_DIR) (/))}) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:snapshot) op: Equal rhs: { (DQ (CommandSubPart command_list: (CommandList children: [(C {(ls)} {(${ VSub_Name BACKUP_DIR)})] ) left_token: spids: [1315 1321] ) ) } spids: [1313] ) ] spids: [1313] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:image) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (C {(docker)} {(run)} {(-d)} {(-v)} {(${ VSub_Name BACKUP_DIR) (Lit_Other ":") (/var/tmp/backup)} {(--env)} {(Lit_VarLike "ETCDCTL_API=") (3)} {(DQ ("gcr.io/google_containers/etcd:") (${ VSub_Name ETCD_VERSION))} {(/bin/sh)} {(-c)} { (DQ ("/usr/local/bin/etcdctl snapshot restore ") (${ VSub_Name BACKUP_DIR) (/) (${ VSub_Name snapshot) (" --name ") (${ VSub_Name NAME) (" --initial-cluster ") (${ VSub_Name NAME) ("=http://localhost:2380; mv /") (${ VSub_Name NAME) (".etcd/member /var/tmp/backup/") ) } ) ] ) left_token: spids: [1339 1397] ) } spids: [1338] ) ] spids: [1338] ) (If arms: [ (if_arm cond: [ (Sentence child: (C {(Lit_Other "[")} {(DQ ($ VSub_QMark "$?"))} {(-ne)} {(DQ (0))} {(Lit_Other "]")} ) terminator: ) ] action: [ (C {(echo)} {(DQ ("Docker container didn't started correctly"))}) (C {(exit)} {(1)}) ] spids: [-1 1417] ) ] spids: [-1 1432] ) (C {(echo)} { (DQ ("Prepare container exit code: ") (CommandSubPart command_list: (CommandList children: [(C {(docker)} {(wait)} {(${ VSub_Name image)})] ) left_token: spids: [1439 1447] ) ) } ) (C {(rm)} {(-f)} {(DQ (${ VSub_Name BACKUP_DIR) (/) (${ VSub_Name snapshot))}) ] spids: [1183 1203] ) ] spids: [-1 1466] ) (C {(cp)} {(DQ (${ VSub_Name VERSION_FILE))} {(DQ (${ VSub_Name BACKUP_DIR))}) (C {(export)} {(Lit_VarLike "CVM=") (CommandSubPart command_list: (CommandList children: [ (Sentence child: (Pipeline children: [ (C {(curl)} {(DQ ("http://metadata/computeMetadata/v1/instance/attributes/"))} {(-H)} {(DQ ("Metadata-Flavor: Google"))} ) (C {(grep)} {(-q)} {(gci)}) ] negated: False stderr_indices: [0] ) terminator: ) (C {(echo)} {($ VSub_QMark "$?")}) ] ) left_token: spids: [1492 1517] ) } ) (If arms: [ (if_arm cond: [ (Sentence child: (DBracket expr: (BoolBinary op_id: BoolBinary_GlobDEqual left: {(DQ ($ VSub_Name "$CVM"))} right: {(DQ (1))} ) ) terminator: ) ] action: [(C {(export)} {(Lit_VarLike "MNT_DISK=") (DQ (/mnt/master-pd))})] spids: [-1 1536] ) ] else_action: [(C {(export)} {(Lit_VarLike "MNT_DISK=") (DQ (/mnt/disks/master-pd))})] spids: [1546 1556] ) (C {(rm)} {(-rf)} {(DQ (${ VSub_Name MNT_DISK) (/var/etcd-corrupted))}) (C {(mkdir)} {(-p)} {(DQ (${ VSub_Name MNT_DISK) (/var/etcd-corrupted))}) (C {(echo)} {(DQ ("Saving corrupted data to ") (${ VSub_Name MNT_DISK) (/var/etcd-corrupted))}) (C {(mv)} {(/var/etcd/data)} {(DQ (${ VSub_Name MNT_DISK) (/var/etcd-corrupted))}) (C {(echo)} {(DQ ("Copying restored data to /var/etcd/data"))}) (C {(mv)} {(DQ (${ VSub_Name BACKUP_DIR))} {(/var/etcd/data)}) (If arms: [ (if_arm cond: [ (Sentence child: (C {(Lit_Other "[")} { (DQ (BracedVarSub token: suffix_op: (StringUnary op_id:VTest_ColonHyphen arg_word:{(SQ )}) spids: [1631 1634] ) ) } {(Lit_Other "=") (Lit_Other "=")} {(DQ (true))} {(Lit_Other "]")} ) terminator: ) ] action: [ (C {(echo)} {(DQ ("Removing event-etcd corrupted data"))}) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:EVENTS_CORRUPTED_DIR) op: Equal rhs: {(DQ (${ VSub_Name MNT_DISK) (/var/etcd-events-corrupted))} spids: [1657] ) ] spids: [1657] ) (C {(rm)} {(-rf)} {(DQ (${ VSub_Name EVENTS_CORRUPTED_DIR))}) (C {(mkdir)} {(-p)} {(DQ (${ VSub_Name EVENTS_CORRUPTED_DIR))}) (C {(mv)} {(/var/etcd/data-events)} {(DQ (${ VSub_Name EVENTS_CORRUPTED_DIR))}) ] spids: [-1 1647] ) ] spids: [-1 1702] ) (C {(echo)} {(DQ ("Restarting etcd and apiserver from restored snapshot"))}) (C {(mv)} {(DQ (${ VSub_Name MANIFEST_BACKUP_DIR)) (/) (Lit_Other "*")} {(DQ (${ VSub_Name MANIFEST_DIR) (/))} ) (C {(rm)} {(-rf)} {(DQ (${ VSub_Name MANIFEST_BACKUP_DIR))}) (C {(echo)} {(DQ ("Waiting for etcd to come back"))}) (If arms: [ (if_arm cond: [ (Sentence child: (Pipeline children: [(C {(wait_for_etcd_up)} {(DQ (${ VSub_Name etcd_port))})] negated: True ) terminator: ) ] action: [(C {(echo)} {(DQ ("Etcd didn't come back correctly"))}) (C {(exit)} {(1)})] spids: [-1 1764] ) ] spids: [-1 1778] ) (C {(echo)} {(DQ ("Waiting for event etcd to come back"))}) (If arms: [ (if_arm cond: [ (Sentence child: (Pipeline children: [(C {(wait_for_etcd_up)} {(DQ (${ VSub_Name event_etcd_port))})] negated: True ) terminator: ) ] action: [(C {(echo)} {(DQ ("Event etcd didn't come back correctly"))}) (C {(exit)} {(1)})] spids: [-1 1803] ) ] spids: [-1 1817] ) (C {(echo)} {(DQ ("Waiting for apiserver to come back"))}) (If arms: [ (if_arm cond: [ (Sentence child: (Pipeline children:[(C {(wait_for_cluster_healthy)})] negated:True) terminator: ) ] action: [(C {(echo)} {(DQ ("Apiserver didn't come back correctly"))}) (C {(exit)} {(1)})] spids: [-1 1836] ) ] spids: [-1 1850] ) (C {(echo)} {(DQ ("Cluster successfully restored!"))}) ] )