#!/bin/bash # Copyright 2016 The Kubernetes Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # This script is for master and node instance health monitoring, which is # packed in kube-manifest tarball. It is executed through a systemd service # in cluster/gce/gci/.yaml. The env variables come from an env # file provided by the systemd service. set -o nounset set -o pipefail # We simply kill the process when there is a failure. Another systemd service will # automatically restart the process. proc docker_monitoring { while [ 1 ] { if ! timeout 60 docker ps > /dev/null { echo "Docker daemon failed!" pkill docker # Wait for a while, as we don't want to kill it again before it is really up. sleep 30 } else { sleep $(SLEEP_SECONDS) } } } proc kubelet_monitoring { echo "Wait for 2 minutes for kubelet to be functional" # TODO(andyzheng0831): replace it with a more reliable method if possible. sleep 120 var -r max_seconds = '10' var output = ''"" while [ 1 ] { if ! output := $[curl -m $(max_seconds) -f -s -S http://127.0.0.1:10255/healthz !2 > !1] { # Print the response and/or errors. echo $output echo "Kubelet is unhealthy!" pkill kubelet # Wait for a while, as we don't want to kill it again before it is really up. sleep 60 } else { sleep $(SLEEP_SECONDS) } } } ############## Main Function ################ if [[ "$#" -ne 1 ]] { echo "Usage: health-monitor.sh " exit 1 } global KUBE_ENV := '"/home/kubernetes/kube-env'" if [[ ! -e "${KUBE_ENV}" ]] { echo "The $(KUBE_ENV) file does not exist!! Terminate health monitoring" exit 1 } global SLEEP_SECONDS := '10' global component := $1 echo "Start kubernetes health monitoring for $(component)" source $(KUBE_ENV) if [[ "${component}" == "docker" ]] { docker_monitoring } elif [[ "${component}" == "kubelet" ]] { kubelet_monitoring } else { echo "Health monitoring for component "$(component)" is not supported!" } (CommandList children: [ (C {(set)} {(-o)} {(nounset)}) (C {(set)} {(-o)} {(pipefail)}) (FuncDef name: docker_monitoring body: (BraceGroup children: [ (While cond: [ (Sentence child: (C {(Lit_Other "[")} {(1)} {(Lit_Other "]")}) terminator: ) ] body: (DoGroup children: [ (If arms: [ (if_arm cond: [ (Sentence child: (Pipeline children: [ (SimpleCommand words: [{(timeout)} {(60)} {(docker)} {(ps)}] redirects: [ (Redir op_id: Redir_Great fd: -1 arg_word: {(/dev/null)} spids: [107] ) ] ) ] negated: True ) terminator: ) ] action: [ (C {(echo)} {(DQ ("Docker daemon failed!"))}) (C {(pkill)} {(docker)}) (C {(sleep)} {(30)}) ] spids: [-1 112] ) ] else_action: [(C {(sleep)} {(DQ (${ VSub_Name SLEEP_SECONDS))})] spids: [136 148] ) ] spids: [92 151] ) ) ] spids: [80] ) spids: [76 79] ) (FuncDef name: kubelet_monitoring body: (BraceGroup children: [ (C {(echo)} {(DQ ("Wait for 2 minutes for kubelet to be functional"))}) (C {(sleep)} {(120)}) (Assignment keyword: Assign_Local flags: ["'-r'"] pairs: [(assign_pair lhs:(LhsName name:max_seconds) op:Equal rhs:{(10)} spids:[183])] spids: [179] ) (Assignment keyword: Assign_Local pairs: [(assign_pair lhs:(LhsName name:output) op:Equal rhs:{(DQ )} spids:[189])] spids: [187] ) (While cond: [ (Sentence child: (C {(Lit_Other "[")} {(1)} {(Lit_Other "]")}) terminator: ) ] body: (DoGroup children: [ (If arms: [ (if_arm cond: [ (Sentence child: (Pipeline children: [ (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:output) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (SimpleCommand words: [ {(curl)} {(-m)} {(DQ (${ VSub_Name max_seconds))} {(-f)} {(-s)} {(-S)} {(http) (Lit_Other ":") (//127.0.0.1) (Lit_Other ":") (10255/healthz) } ] redirects: [ (Redir op_id: Redir_GreatAnd fd: 2 arg_word: {(1)} spids: [234] ) ] ) ] ) left_token: spids: [211 236] ) } spids: [210] ) ] spids: [210] ) ] negated: True ) terminator: ) ] action: [ (C {(echo)} {($ VSub_Name "$output")}) (C {(echo)} {(DQ ("Kubelet is unhealthy!"))}) (C {(pkill)} {(kubelet)}) (C {(sleep)} {(60)}) ] spids: [-1 239] ) ] else_action: [(C {(sleep)} {(DQ (${ VSub_Name SLEEP_SECONDS))})] spids: [272 284] ) ] spids: [203 287] ) ) ] spids: [160] ) spids: [156 159] ) (If arms: [ (if_arm cond: [ (Sentence child: (DBracket expr: (BoolBinary op_id:BoolBinary_ne left:{(DQ ($ VSub_Pound "$#"))} right:{(1)}) ) terminator: ) ] action: [(C {(echo)} {(DQ ("Usage: health-monitor.sh "))}) (C {(exit)} {(1)})] spids: [-1 311] ) ] spids: [-1 325] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:KUBE_ENV) op: Equal rhs: {(DQ (/home/kubernetes/kube-env))} spids: [328] ) ] spids: [328] ) (If arms: [ (if_arm cond: [ (Sentence child: (DBracket expr: (LogicalNot child: (BoolUnary op_id:BoolUnary_e child:{(DQ (${ VSub_Name KUBE_ENV))}) ) ) terminator: ) ] action: [ (C {(echo)} { (DQ ("The ") (${ VSub_Name KUBE_ENV) (" file does not exist!! Terminate health monitoring") ) } ) (C {(exit)} {(1)}) ] spids: [-1 350] ) ] spids: [-1 368] ) (Assignment keyword: Assign_None pairs: [(assign_pair lhs:(LhsName name:SLEEP_SECONDS) op:Equal rhs:{(10)} spids:[371])] spids: [371] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:component) op: Equal rhs: {($ VSub_Number "$1")} spids: [374] ) ] spids: [374] ) (C {(echo)} {(DQ ("Start kubernetes health monitoring for ") (${ VSub_Name component))}) (C {(source)} {(DQ (${ VSub_Name KUBE_ENV))}) (If arms: [ (if_arm cond: [ (Sentence child: (DBracket expr: (BoolBinary op_id: BoolBinary_GlobDEqual left: {(DQ (${ VSub_Name component))} right: {(DQ (docker))} ) ) terminator: ) ] action: [(C {(docker_monitoring)})] spids: [-1 413] ) (if_arm cond: [ (Sentence child: (DBracket expr: (BoolBinary op_id: BoolBinary_GlobDEqual left: {(DQ (${ VSub_Name component))} right: {(DQ (kubelet))} ) ) terminator: ) ] action: [(C {(kubelet_monitoring)})] spids: [419 438] ) ] else_action: [ (C {(echo)} {(DQ ("Health monitoring for component ")) (${ VSub_Name component) (DQ (" is not supported!")) } ) ] spids: [443 458] ) ] )