Publish k8s-safe-node-reboot for reuse on catu.dev.

Drain, SSH reboot, and uncordon worker nodes sequentially with configurable FQDN suffix handling for short Kubernetes node names.
2026-06-11 15:18:02 +03:00 · 2026-06-11 15:18:02 +03:00 · 9bc1ed2eb9
commit 9bc1ed2eb9
5 changed files with 554 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 fqdn-suffix.conf
--- a/21
+++ b/21
@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2026 a-real-agent
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/README.md
+++ b/README.md
@ -0,0 +1,85 @@
 # k8s-safe-node-reboot
 Safely drain, reboot, and uncordon Kubernetes worker nodes, one node at a time.
 ## Problem
 Rolling reboots of worker nodes are easy to get wrong: pods left running, SSH targeting the wrong hostname, reboots fired on the wrong cluster, or nodes left cordoned after a failed wait. This script sequences the operations and aborts on failure.
 ## Requirements
 - `kubectl` configured for the target cluster
 - SSH access to nodes as the current user (`sudo reboot` must work)
 - Bash 4+
 ## Usage
 ```bash
 chmod +x reboot-nodes.sh
 # Dry run against one node
 ./reboot-nodes.sh --dry-run worker-01
 # Explicit suffix for short Kubernetes node names
 ./reboot-nodes.sh --fqdn-suffix .prod.example.com worker-01 worker-02
 # Suffix from context mapping file
 ./reboot-nodes.sh --fqdn-suffix-file ./fqdn-suffix.conf k8s-worker
 # Prefix match (all nodes whose name starts with k8s-worker)
 ./reboot-nodes.sh --fqdn-suffix .prod.example.com k8s-worker
 # All nodes in the cluster
 ./reboot-nodes.sh --fqdn-suffix .prod.example.com all
 ```
 ## FQDN resolution
 Kubernetes node names are often short (`worker-01`). SSH usually needs a FQDN.
 Resolution order for the SSH target:
 1. If the node name already contains `.`, use it as-is.
 2. Else append `--fqdn-suffix`, or `FQDN_SUFFIX` env, or a match from `--fqdn-suffix-file`.
 3. If no suffix is set, SSH uses the short node name.
 ### Suffix mapping file
 Copy `fqdn-suffix.example.conf` and edit for your environments:
 ```
 production*|.prod.example.com
 development*|.dev.example.com
 ```
 Patterns are bash `case` globs matched against `kubectl config current-context`.
 ## Workflow per node
 1. Confirm kubectl context (interactive unless `--dry-run`)
 2. Drain (with retries) if the node is schedulable
 3. If cordoned and Ready: check remote uptime via SSH; reboot if uptime exceeds 30 minutes
 4. Wait for bootID change (preferred) or Ready status
 5. Uncordon
 6. Pause 15 seconds before the next node
 ## Options
 | Flag | Description |
 |------|-------------|
 | `--dry-run` | Print planned actions; no drain, SSH, or uncordon |
 | `--fqdn-suffix SUFFIX` | Append suffix to short node names for SSH |
 | `--fqdn-suffix-file FILE` | Context glob → suffix mappings |
 | `--help` | Show usage |
 ## Limits
 - Processes nodes **sequentially**, not in parallel
 - Assumes `kubectl drain` with `--ignore-daemonsets --delete-emptydir-data` is acceptable
 - Uptime skip threshold is fixed at 30 minutes (edit `UPTIME_THRESHOLD_SEC` in the script)
 - No built-in SSH options (keys, `StrictHostKeyChecking`, jump hosts) — configure `~/.ssh/config`
 - Aborts the entire run if any node fails drain, ready wait, or uncordon
 ## License
 MIT — see [LICENSE](LICENSE).
--- a/fqdn-suffix.example.conf
+++ b/fqdn-suffix.example.conf
@ -0,0 +1,6 @@
 # Map kubectl context names to SSH FQDN suffixes.
 # Format: CONTEXT_GLOB|SUFFIX
 # Lines starting with # are comments. Globs use bash case patterns.
 development*|.dev.example.com
 production*|.prod.example.com
--- a/reboot-nodes.sh
+++ b/reboot-nodes.sh
@ -0,0 +1,441 @@
 #!/bin/bash
 # ==============================================================================
 # Script Name: reboot-nodes.sh
 # Author: a-real-agent
 # ==============================================================================
 set -e
 set -u
 set -o pipefail
 # Timeouts in seconds
 DRAIN_TIMEOUT_SEC=120
 NODE_NOTREADY_TIMEOUT_SEC=180
 NODE_READY_TIMEOUT_SEC=600
 UPTIME_THRESHOLD_SEC=1800
 PAUSE_AFTER_SUCCESSFUL_REBOOT=15
 DRAIN_MAX_RETRIES=3
 DRAIN_RETRY_DELAY_SEC=30
 DRY_RUN=false
 FQDN_SUFFIX_FILE=""
 FQDN_SUFFIX_OVERRIDE=""
 NODE_PATTERN=""
 NODE_NAMES=()
 log() {
  echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')] - $1"
 }
 usage() {
  cat <<'EOF'
 Usage: reboot-nodes.sh [OPTIONS] <node1> [node2 ...]
   or: reboot-nodes.sh [OPTIONS] <pattern>
 Safely drain, reboot (SSH), and uncordon Kubernetes worker nodes, one at a time.
 Options:
  --dry-run                 Print actions without changing the cluster or SSHing.
  --fqdn-suffix SUFFIX      Append SUFFIX to short node names for SSH (e.g. .prod.example.com).
  --fqdn-suffix-file FILE   Map kubectl context globs to suffixes (see fqdn-suffix.example.conf).
  --help                    Show this help.
 Environment:
  FQDN_SUFFIX               Same as --fqdn-suffix when the flag is not set.
 Arguments:
  <node...>                 Explicit node names (must exist in the current context).
  <pattern>                 Single selector: 'all' or a prefix match against node names.
 SSH target resolution:
  1. If the node name already contains '.', use it as the SSH target.
  2. Else append the resolved FQDN suffix (flag, env, or suffix file).
  3. If no suffix is resolved, SSH uses the short node name (may fail if SSH needs FQDN).
 EOF
 }
 wait_for_node_status() {
  local node_name=$1
  local desired_status=$2
  local timeout_seconds=$3
  local condition_text=""
  [[ "${desired_status}" == "False" ]] && condition_text="NotReady" || condition_text="Ready"
  log "Waiting for ${node_name} to become ${condition_text} (timeout: ${timeout_seconds}s)..."
  local end_time=$((SECONDS + timeout_seconds))
  while [ $SECONDS -lt $end_time ]; do
    current_status=$(kubectl get node "${node_name}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "Unknown")
    if [[ "${desired_status}" == "False" && ("${current_status}" == "False" || "${current_status}" == "Unknown") ]]; then
      echo
      log "[ok] Node ${node_name} is ${condition_text} (status: ${current_status})."
      return 0
    fi
    if [[ "${desired_status}" == "True" && "${current_status}" == "True" ]]; then
      echo
      log "[ok] Node ${node_name} is ${condition_text}."
      return 0
    fi
    printf "."
    sleep 2
  done
  echo
  log "[fail] Timed out after ${timeout_seconds}s waiting for node ${node_name} to become ${condition_text}."
  return 1
 }
 get_node_boot_id() {
  local node_name=$1
  kubectl get node "${node_name}" -o jsonpath='{.status.nodeInfo.bootID}' 2>/dev/null || true
 }
 wait_for_boot_id_change() {
  local node_name=$1
  local old_boot_id=$2
  local timeout_seconds=$3
  local end_time=$((SECONDS + timeout_seconds))
  if [ -z "${old_boot_id}" ]; then
    log "WARN: Old bootID is empty; cannot detect reboot by bootID."
    return 1
  fi
  log "Waiting for ${node_name} bootID to change (timeout: ${timeout_seconds}s)..."
  while [ $SECONDS -lt $end_time ]; do
    local current_boot_id
    current_boot_id=$(get_node_boot_id "${node_name}")
    if [ -n "${current_boot_id}" ] && [ "${current_boot_id}" != "${old_boot_id}" ]; then
      echo
      log "[ok] bootID changed for ${node_name}."
      return 0
    fi
    printf "."
    sleep 2
  done
  echo
  log "[fail] Timed out after ${timeout_seconds}s waiting for bootID change on ${node_name}."
  return 1
 }
 # Match kubectl context against glob patterns from a suffix file.
 # File format: one mapping per line — PATTERN|SUFFIX  (# comments allowed)
 resolve_fqdn_suffix_from_file() {
  local context=$1
  local file=$2
  local line pattern suffix
  if [ ! -f "${file}" ]; then
    log "ERROR: FQDN suffix file not found: ${file}"
    exit 1
  fi
  while IFS= read -r line || [ -n "${line}" ]; do
    line="${line%%#*}"
    line="${line#"${line%%[![:space:]]*}"}"
    [ -z "${line}" ] && continue
    pattern="${line%%|*}"
    suffix="${line#*|}"
    pattern="${pattern%"${pattern##*[![:space:]]}"}"
    suffix="${suffix#"${suffix%%[![:space:]]*}"}"
    if [ -z "${pattern}" ] || [ -z "${suffix}" ]; then
      log "WARN: Skipping invalid suffix mapping line: ${line}"
      continue
    fi
    case "${context}" in
      ${pattern})
        echo "${suffix}"
        return 0
        ;;
    esac
  done < "${file}"
  return 1
 }
 resolve_ssh_target() {
  local node=$1
  local suffix=$2
  if [[ "${node}" == *.* ]]; then
    echo "${node}"
    return 0
  fi
  if [ -n "${suffix}" ]; then
    echo "${node}${suffix}"
    return 0
  fi
  echo "${node}"
 }
 while [[ $# -gt 0 ]]; do
  key="$1"
  case $key in
    --dry-run)
      DRY_RUN=true
      shift
      ;;
    --fqdn-suffix)
      if [ $# -lt 2 ]; then
        log "ERROR: --fqdn-suffix requires a value."
        usage
        exit 1
      fi
      FQDN_SUFFIX_OVERRIDE="$2"
      shift 2
      ;;
    --fqdn-suffix-file)
      if [ $# -lt 2 ]; then
        log "ERROR: --fqdn-suffix-file requires a path."
        usage
        exit 1
      fi
      FQDN_SUFFIX_FILE="$2"
      shift 2
      ;;
    --help)
      usage
      exit 0
      ;;
    -*)
      log "ERROR: Unknown option: $1"
      usage
      exit 1
      ;;
    *)
      NODE_NAMES+=("$1")
      shift
      ;;
  esac
 done
 if [ ${#NODE_NAMES[@]} -eq 0 ]; then
  log "ERROR: No node(s) or pattern specified."
  usage
  exit 1
 fi
 if [ "$DRY_RUN" = true ]; then
  log "DRY RUN MODE ENABLED. No changes will be made to the cluster."
 fi
 CURRENT_CONTEXT=$(kubectl config current-context)
 log "Operating on Kubernetes context: ${CURRENT_CONTEXT}"
 if [ "$DRY_RUN" = false ]; then
  read -p "Is this the correct cluster? (y/N) " -n 1 -r
  echo
  if [[ ! $REPLY =~ ^[Yy]$ ]]; then
    log "Aborting. Use 'kubectx' or 'kubectl config' to switch contexts."
    exit 1
  fi
 fi
 FQDN_SUFFIX="${FQDN_SUFFIX_OVERRIDE:-${FQDN_SUFFIX:-}}"
 if [ -z "${FQDN_SUFFIX}" ] && [ -n "${FQDN_SUFFIX_FILE}" ]; then
  if resolved=$(resolve_fqdn_suffix_from_file "${CURRENT_CONTEXT}" "${FQDN_SUFFIX_FILE}"); then
    FQDN_SUFFIX="${resolved}"
    log "Matched FQDN suffix '${FQDN_SUFFIX}' from ${FQDN_SUFFIX_FILE}."
  else
    log "WARN: No suffix mapping matched context '${CURRENT_CONTEXT}' in ${FQDN_SUFFIX_FILE}."
  fi
 fi
 if [ -n "${FQDN_SUFFIX}" ]; then
  log "Using FQDN suffix '${FQDN_SUFFIX}' for short node names."
 else
  log "No FQDN suffix configured. Nodes without '.' in the name will be SSH'd by short name."
 fi
 ALL_NODES=$(kubectl get nodes -o name)
 NODES_TO_REBOOT=()
 if [ ${#NODE_NAMES[@]} -gt 1 ]; then
  log "Resolving nodes from explicit list: ${NODE_NAMES[*]}"
  for requested_node in "${NODE_NAMES[@]}"; do
    found=false
    for node_ref in $ALL_NODES; do
      node_name=${node_ref#node/}
      if [[ "${node_name}" == "${requested_node}" ]]; then
        found=true
        NODES_TO_REBOOT+=("${node_name}")
        break
      fi
    done
    if [ "$found" = false ]; then
      log "ERROR: Node '${requested_node}' not found in context '${CURRENT_CONTEXT}'. Aborting."
      exit 1
    fi
  done
 else
  NODE_PATTERN="${NODE_NAMES[0]}"
  log "Resolving nodes for pattern: ${NODE_PATTERN}"
  if [[ "${NODE_PATTERN}" == "all" ]]; then
    for node_ref in $ALL_NODES; do
      NODES_TO_REBOOT+=("${node_ref#node/}")
    done
  else
    for node_ref in $ALL_NODES; do
      node_name=${node_ref#node/}
      if [[ $node_name == ${NODE_PATTERN}* ]]; then
        NODES_TO_REBOOT+=("$node_name")
      fi
    done
  fi
 fi
 if [ ${#NODES_TO_REBOOT[@]} -eq 0 ]; then
  if [ -n "${NODE_PATTERN}" ]; then
    log "ERROR: No nodes found matching the pattern '${NODE_PATTERN}*'. Aborting."
  else
    log "ERROR: No nodes resolved from provided input. Aborting."
  fi
  exit 1
 fi
 log "The following ${#NODES_TO_REBOOT[@]} nodes will be processed sequentially:"
 printf " - %s\n" "${NODES_TO_REBOOT[@]}"
 if [ "$DRY_RUN" = false ]; then
  read -p "Do you want to proceed? (y/N) " -n 1 -r
  echo
  if [[ ! $REPLY =~ ^[Yy]$ ]]; then
    log "Aborting by user request."
    exit 1
  fi
 fi
 node_count=${#NODES_TO_REBOOT[@]}
 current_node_index=0
 for node in "${NODES_TO_REBOOT[@]}"; do
  current_node_index=$((current_node_index + 1))
  node_fqdn=$(resolve_ssh_target "${node}" "${FQDN_SUFFIX}")
  log "--- Processing node: ${node} (${current_node_index} of ${node_count}) ---"
  log "SSH target: '${node_fqdn}'"
  is_unschedulable=$(kubectl get node "${node}" -o jsonpath='{.spec.unschedulable}' 2>/dev/null || echo "")
  is_ready=$(kubectl get node "${node}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "Unknown")
  old_boot_id=$(get_node_boot_id "${node}")
  if [[ "$is_unschedulable" != "true" ]]; then
    log "Node is schedulable. Starting drain..."
    if [ "$DRY_RUN" = true ]; then
      log "[DRY RUN] Would execute: kubectl drain \"${node}\" with retries"
    else
      drain_success=false
      for i in $(seq 1 "${DRAIN_MAX_RETRIES}"); do
        if kubectl drain "${node}" --ignore-daemonsets --delete-emptydir-data --timeout="${DRAIN_TIMEOUT_SEC}s"; then
          log "${node} drained successfully."
          drain_success=true
          break
        else
          log "Drain attempt ${i} of ${DRAIN_MAX_RETRIES} failed."
          if [ "${i}" -lt "${DRAIN_MAX_RETRIES}" ]; then
            log "Retrying in ${DRAIN_RETRY_DELAY_SEC} seconds..."
            sleep "${DRAIN_RETRY_DELAY_SEC}"
          fi
        fi
      done
      if [ "$drain_success" = false ]; then
        log "ERROR: Failed to drain ${node} after ${DRAIN_MAX_RETRIES} attempts. Aborting."
        exit 1
      fi
    fi
  else
    log "Node is already unschedulable (cordoned). Skipping drain."
  fi
  is_unschedulable=$(kubectl get node "${node}" -o jsonpath='{.spec.unschedulable}' 2>/dev/null || echo "")
  is_ready=$(kubectl get node "${node}" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "Unknown")
  reboot_requested=false
  reboot_in_progress=false
  if [[ "$is_ready" == "True" ]]; then
    if [[ "$is_unschedulable" == "true" ]]; then
      log "Node is cordoned but Ready. Checking uptime to see if reboot is needed..."
      if [ "$DRY_RUN" = true ]; then
        log "[DRY RUN] Would SSH to ${node_fqdn} to read /proc/uptime."
      else
        uptime_sec=$(ssh -o ConnectTimeout=10 "${node_fqdn}" "awk '{print int(\$1)}' /proc/uptime" 2>/dev/null || echo "")
        if [[ -n "${uptime_sec}" && "${uptime_sec}" =~ ^[0-9]+$ ]]; then
          if [ "${uptime_sec}" -lt "${UPTIME_THRESHOLD_SEC}" ]; then
            log "Node uptime is ${uptime_sec}s (less than threshold of ${UPTIME_THRESHOLD_SEC}s). Skipping reboot."
          else
            log "Node uptime is ${uptime_sec}s. Proceeding with reboot."
            log "Rebooting ${node_fqdn} via SSH..."
            if ! ssh "${node_fqdn}" "sudo reboot"; then
              log "WARN: SSH command exited non-zero (often expected during reboot). Continuing..."
            fi
            reboot_requested=true
          fi
        else
          log "WARN: Could not retrieve uptime from node. Proceeding with reboot as a safeguard."
          log "Rebooting ${node_fqdn} via SSH..."
          if ! ssh "${node_fqdn}" "sudo reboot"; then
            log "WARN: SSH command exited non-zero (often expected during reboot). Continuing..."
          fi
          reboot_requested=true
        fi
      fi
    fi
  else
    log "Node is NotReady (status: ${is_ready}). Assuming reboot is in progress. Skipping reboot command."
    reboot_in_progress=true
  fi
  if [ "$DRY_RUN" = true ]; then
    if [ "$reboot_in_progress" = true ]; then
      log "[DRY RUN] Would wait for node to become Ready."
    elif [ "$reboot_requested" = true ]; then
      log "[DRY RUN] Would wait for bootID change and then Ready."
    else
      log "[DRY RUN] No reboot performed; skipping wait steps."
    fi
  else
    if [ "$reboot_in_progress" = true ]; then
      if ! wait_for_node_status "${node}" "True" "${NODE_READY_TIMEOUT_SEC}"; then
        log "ERROR: Node ${node} did not become Ready within the timeout. Aborting."
        exit 1
      fi
    elif [ "$reboot_requested" = true ]; then
      if ! wait_for_boot_id_change "${node}" "${old_boot_id}" "${NODE_READY_TIMEOUT_SEC}"; then
        log "WARN: bootID did not change within timeout; falling back to status checks."
        if ! wait_for_node_status "${node}" "False" "${NODE_NOTREADY_TIMEOUT_SEC}"; then
          log "WARN: Node ${node} did not report NotReady; proceeding to wait for Ready anyway."
        fi
      fi
      if ! wait_for_node_status "${node}" "True" "${NODE_READY_TIMEOUT_SEC}"; then
        log "ERROR: Node ${node} did not become Ready within the timeout. Aborting."
        exit 1
      fi
    else
      log "No reboot required; skipping wait steps."
    fi
  fi
  log "Uncordoning ${node}..."
  if [ "$DRY_RUN" = true ]; then
    log "[DRY RUN] Would execute: kubectl uncordon ${node}"
  else
    if ! kubectl uncordon "${node}"; then
      log "ERROR: Failed to uncordon ${node}. Aborting."
      exit 1
    fi
  fi
  log "Successfully processed node ${node}."
  if [ "$DRY_RUN" = false ] && [ "${current_node_index}" -lt "${node_count}" ]; then
    log "Pausing for ${PAUSE_AFTER_SUCCESSFUL_REBOOT} seconds before processing the next node..."
    sleep "${PAUSE_AFTER_SUCCESSFUL_REBOOT}"
  fi
 done
 log "--- All specified nodes have been processed. ---"