#!/bin/bash # Copyright 2016 The Kubernetes Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # This script is for master and node instance health monitoring, which is # packed in kube-manifest tarball. It is executed through a systemd service # in cluster/gce/gci/.yaml. The env variables come from an env # file provided by the systemd service. set -o nounset set -o pipefail # We simply kill the process when there is a failure. Another systemd service will # automatically restart the process. proc docker_monitoring { while [ 1 ] { if ! timeout 60 docker ps > /dev/null { echo "Docker daemon failed!" pkill docker # Wait for a while, as we don't want to kill it again before it is really up. sleep 30 } else { sleep $(SLEEP_SECONDS) } } } proc kubelet_monitoring { echo "Wait for 2 minutes for kubelet to be fuctional" # TODO(andyzheng0831): replace it with a more reliable method if possible. sleep 120 var -r max_seconds = '10' var output = ''"" while [ 1 ] { if ! output := $[curl --insecure -m $(max_seconds) -f -s -S https://127.0.0.1:$(KUBELET_PORT:-10250)/healthz !2 > !1] { # Print the response and/or errors. echo $output echo "Kubelet is unhealthy!" pkill kubelet # Wait for a while, as we don't want to kill it again before it is really up. sleep 60 } else { sleep $(SLEEP_SECONDS) } } } ############## Main Function ################ if [[ "$#" -ne 1 ]] { echo "Usage: health-monitor.sh " exit 1 } global KUBE_ENV := '"/home/kubernetes/kube-env'" if [[ ! -e "${KUBE_ENV}" ]] { echo "The $(KUBE_ENV) file does not exist!! Terminate health monitoring" exit 1 } global SLEEP_SECONDS := '10' global component := $1 echo "Start kubernetes health monitoring for $(component)" source $(KUBE_ENV) if [[ "${component}" == "docker" ]] { docker_monitoring } elif [[ "${component}" == "kubelet" ]] { kubelet_monitoring } else { echo "Health monitoring for component "$(component)" is not supported!" } (CommandList children: [ (C {(set)} {(-o)} {(nounset)}) (C {(set)} {(-o)} {(pipefail)}) (FuncDef name: docker_monitoring body: (BraceGroup children: [ (While cond: [ (Sentence child: (C {(Lit_Other "[")} {(1)} {(Lit_Other "]")}) terminator: ) ] body: (DoGroup children: [ (If arms: [ (if_arm cond: [ (Sentence child: (Pipeline children: [ (SimpleCommand words: [{(timeout)} {(60)} {(docker)} {(ps)}] redirects: [ (Redir op_id: Redir_Great fd: -1 arg_word: {(/dev/null)} spids: [107] ) ] ) ] negated: True ) terminator: ) ] action: [ (C {(echo)} {(DQ ("Docker daemon failed!"))}) (C {(pkill)} {(docker)}) (C {(sleep)} {(30)}) ] spids: [-1 112] ) ] else_action: [(C {(sleep)} {(DQ (${ VSub_Name SLEEP_SECONDS))})] spids: [136 148] ) ] spids: [92 151] ) ) ] spids: [80] ) spids: [76 79] ) (FuncDef name: kubelet_monitoring body: (BraceGroup children: [ (C {(echo)} {(DQ ("Wait for 2 minutes for kubelet to be fuctional"))}) (C {(sleep)} {(120)}) (Assignment keyword: Assign_Local flags: ["'-r'"] pairs: [(assign_pair lhs:(LhsName name:max_seconds) op:Equal rhs:{(10)} spids:[183])] spids: [179] ) (Assignment keyword: Assign_Local pairs: [(assign_pair lhs:(LhsName name:output) op:Equal rhs:{(DQ )} spids:[189])] spids: [187] ) (While cond: [ (Sentence child: (C {(Lit_Other "[")} {(1)} {(Lit_Other "]")}) terminator: ) ] body: (DoGroup children: [ (If arms: [ (if_arm cond: [ (Sentence child: (Pipeline children: [ (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:output) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (SimpleCommand words: [ {(curl)} {(--insecure)} {(-m)} {(DQ (${ VSub_Name max_seconds))} {(-f)} {(-s)} {(-S)} {(https) (Lit_Other ":") (//127.0.0.1) (Lit_Other ":") (BracedVarSub token: suffix_op: (StringUnary op_id: VTest_ColonHyphen arg_word: {(10250)} ) spids: [234 238] ) (/healthz) } ] redirects: [ (Redir op_id: Redir_GreatAnd fd: 2 arg_word: {(1)} spids: [241] ) ] ) ] ) left_token: spids: [211 243] ) } spids: [210] ) ] spids: [210] ) ] negated: True ) terminator: ) ] action: [ (C {(echo)} {($ VSub_Name "$output")}) (C {(echo)} {(DQ ("Kubelet is unhealthy!"))}) (C {(pkill)} {(kubelet)}) (C {(sleep)} {(60)}) ] spids: [-1 246] ) ] else_action: [(C {(sleep)} {(DQ (${ VSub_Name SLEEP_SECONDS))})] spids: [279 291] ) ] spids: [203 294] ) ) ] spids: [160] ) spids: [156 159] ) (If arms: [ (if_arm cond: [ (Sentence child: (DBracket expr: (BoolBinary op_id:BoolBinary_ne left:{(DQ ($ VSub_Pound "$#"))} right:{(1)}) ) terminator: ) ] action: [(C {(echo)} {(DQ ("Usage: health-monitor.sh "))}) (C {(exit)} {(1)})] spids: [-1 318] ) ] spids: [-1 332] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:KUBE_ENV) op: Equal rhs: {(DQ (/home/kubernetes/kube-env))} spids: [335] ) ] spids: [335] ) (If arms: [ (if_arm cond: [ (Sentence child: (DBracket expr: (LogicalNot child: (BoolUnary op_id:BoolUnary_e child:{(DQ (${ VSub_Name KUBE_ENV))}) ) ) terminator: ) ] action: [ (C {(echo)} { (DQ ("The ") (${ VSub_Name KUBE_ENV) (" file does not exist!! Terminate health monitoring") ) } ) (C {(exit)} {(1)}) ] spids: [-1 357] ) ] spids: [-1 375] ) (Assignment keyword: Assign_None pairs: [(assign_pair lhs:(LhsName name:SLEEP_SECONDS) op:Equal rhs:{(10)} spids:[378])] spids: [378] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:component) op: Equal rhs: {($ VSub_Number "$1")} spids: [381] ) ] spids: [381] ) (C {(echo)} {(DQ ("Start kubernetes health monitoring for ") (${ VSub_Name component))}) (C {(source)} {(DQ (${ VSub_Name KUBE_ENV))}) (If arms: [ (if_arm cond: [ (Sentence child: (DBracket expr: (BoolBinary op_id: BoolBinary_GlobDEqual left: {(DQ (${ VSub_Name component))} right: {(DQ (docker))} ) ) terminator: ) ] action: [(C {(docker_monitoring)})] spids: [-1 420] ) (if_arm cond: [ (Sentence child: (DBracket expr: (BoolBinary op_id: BoolBinary_GlobDEqual left: {(DQ (${ VSub_Name component))} right: {(DQ (kubelet))} ) ) terminator: ) ] action: [(C {(kubelet_monitoring)})] spids: [426 445] ) ] else_action: [ (C {(echo)} {(DQ ("Health monitoring for component ")) (${ VSub_Name component) (DQ (" is not supported!")) } ) ] spids: [450 465] ) ] )