#!/bin/bash # ============================================================================== # Script Name: reboot-nodes.sh # Author: a-real-agent # ============================================================================== set -e set -u set -o pipefail # Timeouts in seconds DRAIN_TIMEOUT_SEC=120 NODE_NOTREADY_TIMEOUT_SEC=180 NODE_READY_TIMEOUT_SEC=600 UPTIME_THRESHOLD_SEC=1800 PAUSE_AFTER_SUCCESSFUL_REBOOT=15 DRAIN_MAX_RETRIES=3 DRAIN_RETRY_DELAY_SEC=30 DRY_RUN=false FQDN_SUFFIX_FILE="" FQDN_SUFFIX_OVERRIDE="" NODE_PATTERN="" NODE_NAMES=() log() { echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')] - $1" } usage() { cat <<'EOF' Usage: reboot-nodes.sh [OPTIONS] [node2 ...] or: reboot-nodes.sh [OPTIONS] Safely drain, reboot (SSH), and uncordon Kubernetes worker nodes, one at a time. Options: --dry-run Print actions without changing the cluster or SSHing. --fqdn-suffix SUFFIX Append SUFFIX to short node names for SSH (e.g. .prod.example.com). --fqdn-suffix-file FILE Map kubectl context globs to suffixes (see fqdn-suffix.example.conf). --help Show this help. Environment: FQDN_SUFFIX Same as --fqdn-suffix when the flag is not set. Arguments: Explicit node names (must exist in the current context). Single selector: 'all' or a prefix match against node names. SSH target resolution: 1. If the node name already contains '.', use it as the SSH target. 2. Else append the resolved FQDN suffix (flag, env, or suffix file). 3. If no suffix is resolved, SSH uses the short node name (may fail if SSH needs FQDN). EOF } wait_for_node_status() { local node_name=$1 local desired_status=$2 local timeout_seconds=$3 local condition_text="" [[ "${desired_status}" == "False" ]] && condition_text="NotReady" || condition_text="Ready" log "Waiting for ${node_name} to become ${condition_text} (timeout: ${timeout_seconds}s)..." local end_time=$((SECONDS + timeout_seconds)) while [ $SECONDS -lt $end_time ]; do current_status=$(kubectl get node "${node_name}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "Unknown") if [[ "${desired_status}" == "False" && ("${current_status}" == "False" || "${current_status}" == "Unknown") ]]; then echo log "[ok] Node ${node_name} is ${condition_text} (status: ${current_status})." return 0 fi if [[ "${desired_status}" == "True" && "${current_status}" == "True" ]]; then echo log "[ok] Node ${node_name} is ${condition_text}." return 0 fi printf "." sleep 2 done echo log "[fail] Timed out after ${timeout_seconds}s waiting for node ${node_name} to become ${condition_text}." return 1 } get_node_boot_id() { local node_name=$1 kubectl get node "${node_name}" -o jsonpath='{.status.nodeInfo.bootID}' 2>/dev/null || true } wait_for_boot_id_change() { local node_name=$1 local old_boot_id=$2 local timeout_seconds=$3 local end_time=$((SECONDS + timeout_seconds)) if [ -z "${old_boot_id}" ]; then log "WARN: Old bootID is empty; cannot detect reboot by bootID." return 1 fi log "Waiting for ${node_name} bootID to change (timeout: ${timeout_seconds}s)..." while [ $SECONDS -lt $end_time ]; do local current_boot_id current_boot_id=$(get_node_boot_id "${node_name}") if [ -n "${current_boot_id}" ] && [ "${current_boot_id}" != "${old_boot_id}" ]; then echo log "[ok] bootID changed for ${node_name}." return 0 fi printf "." sleep 2 done echo log "[fail] Timed out after ${timeout_seconds}s waiting for bootID change on ${node_name}." return 1 } # Match kubectl context against glob patterns from a suffix file. # File format: one mapping per line — PATTERN|SUFFIX (# comments allowed) resolve_fqdn_suffix_from_file() { local context=$1 local file=$2 local line pattern suffix if [ ! -f "${file}" ]; then log "ERROR: FQDN suffix file not found: ${file}" exit 1 fi while IFS= read -r line || [ -n "${line}" ]; do line="${line%%#*}" line="${line#"${line%%[![:space:]]*}"}" [ -z "${line}" ] && continue pattern="${line%%|*}" suffix="${line#*|}" pattern="${pattern%"${pattern##*[![:space:]]}"}" suffix="${suffix#"${suffix%%[![:space:]]*}"}" if [ -z "${pattern}" ] || [ -z "${suffix}" ]; then log "WARN: Skipping invalid suffix mapping line: ${line}" continue fi case "${context}" in ${pattern}) echo "${suffix}" return 0 ;; esac done < "${file}" return 1 } resolve_ssh_target() { local node=$1 local suffix=$2 if [[ "${node}" == *.* ]]; then echo "${node}" return 0 fi if [ -n "${suffix}" ]; then echo "${node}${suffix}" return 0 fi echo "${node}" } while [[ $# -gt 0 ]]; do key="$1" case $key in --dry-run) DRY_RUN=true shift ;; --fqdn-suffix) if [ $# -lt 2 ]; then log "ERROR: --fqdn-suffix requires a value." usage exit 1 fi FQDN_SUFFIX_OVERRIDE="$2" shift 2 ;; --fqdn-suffix-file) if [ $# -lt 2 ]; then log "ERROR: --fqdn-suffix-file requires a path." usage exit 1 fi FQDN_SUFFIX_FILE="$2" shift 2 ;; --help) usage exit 0 ;; -*) log "ERROR: Unknown option: $1" usage exit 1 ;; *) NODE_NAMES+=("$1") shift ;; esac done if [ ${#NODE_NAMES[@]} -eq 0 ]; then log "ERROR: No node(s) or pattern specified." usage exit 1 fi if [ "$DRY_RUN" = true ]; then log "DRY RUN MODE ENABLED. No changes will be made to the cluster." fi CURRENT_CONTEXT=$(kubectl config current-context) log "Operating on Kubernetes context: ${CURRENT_CONTEXT}" if [ "$DRY_RUN" = false ]; then read -p "Is this the correct cluster? (y/N) " -n 1 -r echo if [[ ! $REPLY =~ ^[Yy]$ ]]; then log "Aborting. Use 'kubectx' or 'kubectl config' to switch contexts." exit 1 fi fi FQDN_SUFFIX="${FQDN_SUFFIX_OVERRIDE:-${FQDN_SUFFIX:-}}" if [ -z "${FQDN_SUFFIX}" ] && [ -n "${FQDN_SUFFIX_FILE}" ]; then if resolved=$(resolve_fqdn_suffix_from_file "${CURRENT_CONTEXT}" "${FQDN_SUFFIX_FILE}"); then FQDN_SUFFIX="${resolved}" log "Matched FQDN suffix '${FQDN_SUFFIX}' from ${FQDN_SUFFIX_FILE}." else log "WARN: No suffix mapping matched context '${CURRENT_CONTEXT}' in ${FQDN_SUFFIX_FILE}." fi fi if [ -n "${FQDN_SUFFIX}" ]; then log "Using FQDN suffix '${FQDN_SUFFIX}' for short node names." else log "No FQDN suffix configured. Nodes without '.' in the name will be SSH'd by short name." fi ALL_NODES=$(kubectl get nodes -o name) NODES_TO_REBOOT=() if [ ${#NODE_NAMES[@]} -gt 1 ]; then log "Resolving nodes from explicit list: ${NODE_NAMES[*]}" for requested_node in "${NODE_NAMES[@]}"; do found=false for node_ref in $ALL_NODES; do node_name=${node_ref#node/} if [[ "${node_name}" == "${requested_node}" ]]; then found=true NODES_TO_REBOOT+=("${node_name}") break fi done if [ "$found" = false ]; then log "ERROR: Node '${requested_node}' not found in context '${CURRENT_CONTEXT}'. Aborting." exit 1 fi done else NODE_PATTERN="${NODE_NAMES[0]}" log "Resolving nodes for pattern: ${NODE_PATTERN}" if [[ "${NODE_PATTERN}" == "all" ]]; then for node_ref in $ALL_NODES; do NODES_TO_REBOOT+=("${node_ref#node/}") done else for node_ref in $ALL_NODES; do node_name=${node_ref#node/} if [[ $node_name == ${NODE_PATTERN}* ]]; then NODES_TO_REBOOT+=("$node_name") fi done fi fi if [ ${#NODES_TO_REBOOT[@]} -eq 0 ]; then if [ -n "${NODE_PATTERN}" ]; then log "ERROR: No nodes found matching the pattern '${NODE_PATTERN}*'. Aborting." else log "ERROR: No nodes resolved from provided input. Aborting." fi exit 1 fi log "The following ${#NODES_TO_REBOOT[@]} nodes will be processed sequentially:" printf " - %s\n" "${NODES_TO_REBOOT[@]}" if [ "$DRY_RUN" = false ]; then read -p "Do you want to proceed? (y/N) " -n 1 -r echo if [[ ! $REPLY =~ ^[Yy]$ ]]; then log "Aborting by user request." exit 1 fi fi node_count=${#NODES_TO_REBOOT[@]} current_node_index=0 for node in "${NODES_TO_REBOOT[@]}"; do current_node_index=$((current_node_index + 1)) node_fqdn=$(resolve_ssh_target "${node}" "${FQDN_SUFFIX}") log "--- Processing node: ${node} (${current_node_index} of ${node_count}) ---" log "SSH target: '${node_fqdn}'" is_unschedulable=$(kubectl get node "${node}" -o jsonpath='{.spec.unschedulable}' 2>/dev/null || echo "") is_ready=$(kubectl get node "${node}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "Unknown") old_boot_id=$(get_node_boot_id "${node}") if [[ "$is_unschedulable" != "true" ]]; then log "Node is schedulable. Starting drain..." if [ "$DRY_RUN" = true ]; then log "[DRY RUN] Would execute: kubectl drain \"${node}\" with retries" else drain_success=false for i in $(seq 1 "${DRAIN_MAX_RETRIES}"); do if kubectl drain "${node}" --ignore-daemonsets --delete-emptydir-data --timeout="${DRAIN_TIMEOUT_SEC}s"; then log "${node} drained successfully." drain_success=true break else log "Drain attempt ${i} of ${DRAIN_MAX_RETRIES} failed." if [ "${i}" -lt "${DRAIN_MAX_RETRIES}" ]; then log "Retrying in ${DRAIN_RETRY_DELAY_SEC} seconds..." sleep "${DRAIN_RETRY_DELAY_SEC}" fi fi done if [ "$drain_success" = false ]; then log "ERROR: Failed to drain ${node} after ${DRAIN_MAX_RETRIES} attempts. Aborting." exit 1 fi fi else log "Node is already unschedulable (cordoned). Skipping drain." fi is_unschedulable=$(kubectl get node "${node}" -o jsonpath='{.spec.unschedulable}' 2>/dev/null || echo "") is_ready=$(kubectl get node "${node}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "Unknown") reboot_requested=false reboot_in_progress=false if [[ "$is_ready" == "True" ]]; then if [[ "$is_unschedulable" == "true" ]]; then log "Node is cordoned but Ready. Checking uptime to see if reboot is needed..." if [ "$DRY_RUN" = true ]; then log "[DRY RUN] Would SSH to ${node_fqdn} to read /proc/uptime." else uptime_sec=$(ssh -o ConnectTimeout=10 "${node_fqdn}" "awk '{print int(\$1)}' /proc/uptime" 2>/dev/null || echo "") if [[ -n "${uptime_sec}" && "${uptime_sec}" =~ ^[0-9]+$ ]]; then if [ "${uptime_sec}" -lt "${UPTIME_THRESHOLD_SEC}" ]; then log "Node uptime is ${uptime_sec}s (less than threshold of ${UPTIME_THRESHOLD_SEC}s). Skipping reboot." else log "Node uptime is ${uptime_sec}s. Proceeding with reboot." log "Rebooting ${node_fqdn} via SSH..." if ! ssh "${node_fqdn}" "sudo reboot"; then log "WARN: SSH command exited non-zero (often expected during reboot). Continuing..." fi reboot_requested=true fi else log "WARN: Could not retrieve uptime from node. Proceeding with reboot as a safeguard." log "Rebooting ${node_fqdn} via SSH..." if ! ssh "${node_fqdn}" "sudo reboot"; then log "WARN: SSH command exited non-zero (often expected during reboot). Continuing..." fi reboot_requested=true fi fi fi else log "Node is NotReady (status: ${is_ready}). Assuming reboot is in progress. Skipping reboot command." reboot_in_progress=true fi if [ "$DRY_RUN" = true ]; then if [ "$reboot_in_progress" = true ]; then log "[DRY RUN] Would wait for node to become Ready." elif [ "$reboot_requested" = true ]; then log "[DRY RUN] Would wait for bootID change and then Ready." else log "[DRY RUN] No reboot performed; skipping wait steps." fi else if [ "$reboot_in_progress" = true ]; then if ! wait_for_node_status "${node}" "True" "${NODE_READY_TIMEOUT_SEC}"; then log "ERROR: Node ${node} did not become Ready within the timeout. Aborting." exit 1 fi elif [ "$reboot_requested" = true ]; then if ! wait_for_boot_id_change "${node}" "${old_boot_id}" "${NODE_READY_TIMEOUT_SEC}"; then log "WARN: bootID did not change within timeout; falling back to status checks." if ! wait_for_node_status "${node}" "False" "${NODE_NOTREADY_TIMEOUT_SEC}"; then log "WARN: Node ${node} did not report NotReady; proceeding to wait for Ready anyway." fi fi if ! wait_for_node_status "${node}" "True" "${NODE_READY_TIMEOUT_SEC}"; then log "ERROR: Node ${node} did not become Ready within the timeout. Aborting." exit 1 fi else log "No reboot required; skipping wait steps." fi fi log "Uncordoning ${node}..." if [ "$DRY_RUN" = true ]; then log "[DRY RUN] Would execute: kubectl uncordon ${node}" else if ! kubectl uncordon "${node}"; then log "ERROR: Failed to uncordon ${node}. Aborting." exit 1 fi fi log "Successfully processed node ${node}." if [ "$DRY_RUN" = false ] && [ "${current_node_index}" -lt "${node_count}" ]; then log "Pausing for ${PAUSE_AFTER_SUCCESSFUL_REBOOT} seconds before processing the next node..." sleep "${PAUSE_AFTER_SUCCESSFUL_REBOOT}" fi done log "--- All specified nodes have been processed. ---"