|
| 1 | +#!/usr/bin/env bash |
| 2 | +# Deploy k3s on dstack and set up kubectl access. |
| 3 | +# |
| 4 | +# Usage: |
| 5 | +# export CLOUDFLARE_API_TOKEN=xxx |
| 6 | +# export CERTBOT_EMAIL=you@example.com |
| 7 | +# ./deploy.sh k3s.example.com |
| 8 | +# |
| 9 | +# Prerequisites: |
| 10 | +# - phala CLI installed and authenticated (phala auth login) |
| 11 | +# - kubectl and jq installed |
| 12 | + |
| 13 | +set -euo pipefail |
| 14 | + |
| 15 | +CLUSTER_DOMAIN="${1:-${CLUSTER_DOMAIN:-}}" |
| 16 | +CVM_NAME="${CVM_NAME:-my-k3s}" |
| 17 | +INSTANCE_TYPE="${INSTANCE_TYPE:-tdx.medium}" |
| 18 | +DISK_SIZE="${DISK_SIZE:-50G}" |
| 19 | +KUBECONFIG_FILE="${KUBECONFIG_FILE:-k3s.yaml}" |
| 20 | + |
| 21 | +if [[ -z "$CLUSTER_DOMAIN" ]]; then |
| 22 | + echo "Usage: $0 <cluster-domain>" |
| 23 | + echo " e.g. $0 k3s.example.com" |
| 24 | + echo "" |
| 25 | + echo "Required env vars:" |
| 26 | + echo " CLOUDFLARE_API_TOKEN Cloudflare API token (Zone:Read + DNS:Edit)" |
| 27 | + echo " CERTBOT_EMAIL Email for Let's Encrypt registration" |
| 28 | + echo "" |
| 29 | + echo "Optional env vars:" |
| 30 | + echo " CVM_NAME CVM name (default: my-k3s)" |
| 31 | + echo " INSTANCE_TYPE Instance type (default: tdx.medium)" |
| 32 | + echo " DISK_SIZE Disk size (default: 50G)" |
| 33 | + echo " KUBECONFIG_FILE Output kubeconfig path (default: k3s.yaml)" |
| 34 | + exit 1 |
| 35 | +fi |
| 36 | + |
| 37 | +: "${CLOUDFLARE_API_TOKEN:?CLOUDFLARE_API_TOKEN is required}" |
| 38 | +: "${CERTBOT_EMAIL:?CERTBOT_EMAIL is required}" |
| 39 | + |
| 40 | +for cmd in phala kubectl jq; do |
| 41 | + command -v "$cmd" >/dev/null 2>&1 || { echo "Error: $cmd is required but not found"; exit 1; } |
| 42 | +done |
| 43 | + |
| 44 | +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| 45 | + |
| 46 | +# ---------- Step 1: Deploy the CVM ---------- |
| 47 | + |
| 48 | +echo "==> Deploying CVM '${CVM_NAME}'..." |
| 49 | +phala deploy \ |
| 50 | + -n "$CVM_NAME" \ |
| 51 | + -c "${SCRIPT_DIR}/docker-compose.yaml" \ |
| 52 | + -t "$INSTANCE_TYPE" \ |
| 53 | + --disk-size "$DISK_SIZE" \ |
| 54 | + --dev-os \ |
| 55 | + -e "CLOUDFLARE_API_TOKEN=${CLOUDFLARE_API_TOKEN}" \ |
| 56 | + -e "CERTBOT_EMAIL=${CERTBOT_EMAIL}" \ |
| 57 | + -e "CLUSTER_DOMAIN=${CLUSTER_DOMAIN}" \ |
| 58 | + --wait |
| 59 | + |
| 60 | +# ---------- Step 2: Extract APP_ID and GATEWAY_DOMAIN ---------- |
| 61 | + |
| 62 | +echo "==> Fetching CVM info..." |
| 63 | +CVM_JSON=$(phala cvms get "$CVM_NAME" --json 2>/dev/null) |
| 64 | +APP_ID=$(echo "$CVM_JSON" | jq -r '.app_id') |
| 65 | +GATEWAY_DOMAIN=$(echo "$CVM_JSON" | jq -r '.gateway.base_domain') |
| 66 | + |
| 67 | +echo " App ID: ${APP_ID}" |
| 68 | +echo " Gateway domain: ${GATEWAY_DOMAIN}" |
| 69 | + |
| 70 | +# ---------- Step 3: Wait for SSH and extract kubeconfig ---------- |
| 71 | + |
| 72 | +echo "==> Waiting for CVM to boot (this takes 2-3 minutes)..." |
| 73 | +for i in $(seq 1 30); do |
| 74 | + if phala ssh "$APP_ID" -- "echo ok" >/dev/null 2>&1; then |
| 75 | + break |
| 76 | + fi |
| 77 | + if [[ $i -eq 30 ]]; then |
| 78 | + echo "Error: SSH not available after 5 minutes" |
| 79 | + exit 1 |
| 80 | + fi |
| 81 | + sleep 10 |
| 82 | +done |
| 83 | + |
| 84 | +echo "==> Extracting kubeconfig..." |
| 85 | +for i in $(seq 1 12); do |
| 86 | + if phala ssh "$APP_ID" -- \ |
| 87 | + "docker exec dstack-k3s-1 cat /etc/rancher/k3s/k3s.yaml" \ |
| 88 | + 2>/dev/null > "$KUBECONFIG_FILE" && [[ -s "$KUBECONFIG_FILE" ]]; then |
| 89 | + break |
| 90 | + fi |
| 91 | + if [[ $i -eq 12 ]]; then |
| 92 | + echo "Error: could not extract kubeconfig after 2 minutes" |
| 93 | + exit 1 |
| 94 | + fi |
| 95 | + sleep 10 |
| 96 | +done |
| 97 | + |
| 98 | +# Rewrite API server URL to use the gateway TLS passthrough endpoint |
| 99 | +sed -i "s|https://127.0.0.1:6443|https://${APP_ID}-6443s.${GATEWAY_DOMAIN}|" "$KUBECONFIG_FILE" |
| 100 | + |
| 101 | +export KUBECONFIG="${KUBECONFIG_FILE}" |
| 102 | + |
| 103 | +# ---------- Step 4: Wait for node Ready ---------- |
| 104 | + |
| 105 | +echo "==> Waiting for k3s node to become Ready..." |
| 106 | +for i in $(seq 1 30); do |
| 107 | + STATUS=$(kubectl get nodes -o jsonpath='{.items[0].status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "") |
| 108 | + if [[ "$STATUS" == "True" ]]; then |
| 109 | + break |
| 110 | + fi |
| 111 | + if [[ $i -eq 30 ]]; then |
| 112 | + echo "Error: node not Ready after 5 minutes" |
| 113 | + exit 1 |
| 114 | + fi |
| 115 | + sleep 10 |
| 116 | +done |
| 117 | + |
| 118 | +echo "==> Node is Ready:" |
| 119 | +kubectl get nodes |
| 120 | + |
| 121 | +# ---------- Step 5: Wait for wildcard certificate ---------- |
| 122 | + |
| 123 | +echo "==> Waiting for wildcard TLS certificate (this takes 5-8 minutes)..." |
| 124 | +CERT_TEST_URL="https://test.${CLUSTER_DOMAIN}/" |
| 125 | +for i in $(seq 1 60); do |
| 126 | + HTTP_CODE=$(curl -s -o /dev/null -w '%{http_code}' --max-time 5 "$CERT_TEST_URL" 2>/dev/null || echo "000") |
| 127 | + if [[ "$HTTP_CODE" != "000" ]]; then |
| 128 | + echo " Certificate is ready (got HTTP ${HTTP_CODE})" |
| 129 | + break |
| 130 | + fi |
| 131 | + if [[ $i -eq 60 ]]; then |
| 132 | + echo "Warning: certificate not ready after 10 minutes, continuing anyway" |
| 133 | + break |
| 134 | + fi |
| 135 | + sleep 10 |
| 136 | +done |
| 137 | + |
| 138 | +# ---------- Step 6: Deploy test workload ---------- |
| 139 | + |
| 140 | +echo "==> Deploying nginx test workload..." |
| 141 | +NGINX_HOST="nginx.${CLUSTER_DOMAIN}" |
| 142 | + |
| 143 | +kubectl run nginx --image=nginx:alpine --port=80 --restart=Never 2>/dev/null || true |
| 144 | +kubectl expose pod nginx --port=80 --target-port=80 --name=nginx 2>/dev/null || true |
| 145 | + |
| 146 | +kubectl apply -f - <<EOF |
| 147 | +apiVersion: traefik.io/v1alpha1 |
| 148 | +kind: IngressRoute |
| 149 | +metadata: |
| 150 | + name: nginx |
| 151 | +spec: |
| 152 | + entryPoints: [web] |
| 153 | + routes: |
| 154 | + - match: Host(\`${NGINX_HOST}\`) |
| 155 | + kind: Rule |
| 156 | + services: |
| 157 | + - name: nginx |
| 158 | + port: 80 |
| 159 | +EOF |
| 160 | + |
| 161 | +kubectl wait --for=condition=Ready pod/nginx --timeout=120s |
| 162 | +sleep 10 |
| 163 | + |
| 164 | +# ---------- Smoke test ---------- |
| 165 | + |
| 166 | +echo "" |
| 167 | +echo "==> Smoke test..." |
| 168 | +HTTP_CODE=$(curl -s -o /dev/null -w '%{http_code}' --max-time 10 "https://${NGINX_HOST}/" 2>/dev/null || echo "000") |
| 169 | +if [[ "$HTTP_CODE" == "200" ]]; then |
| 170 | + echo " PASS: https://${NGINX_HOST}/ returned 200" |
| 171 | +else |
| 172 | + echo " WARN: https://${NGINX_HOST}/ returned ${HTTP_CODE} (cert may still be propagating)" |
| 173 | +fi |
| 174 | + |
| 175 | +# ---------- Done ---------- |
| 176 | + |
| 177 | +echo "" |
| 178 | +echo "============================================" |
| 179 | +echo " k3s on dstack is ready!" |
| 180 | +echo "============================================" |
| 181 | +echo "" |
| 182 | +echo " Kubeconfig: export KUBECONFIG=$(pwd)/${KUBECONFIG_FILE}" |
| 183 | +echo " kubectl: kubectl get nodes" |
| 184 | +echo " Test URL: https://${NGINX_HOST}/" |
| 185 | +echo " Evidence: https://${NGINX_HOST}/evidences/quote" |
| 186 | +echo "" |
| 187 | +echo " To clean up:" |
| 188 | +echo " kubectl delete ingressroute.traefik.io nginx && kubectl delete svc nginx && kubectl delete pod nginx" |
| 189 | +echo " echo y | phala cvms delete ${CVM_NAME} && rm ${KUBECONFIG_FILE}" |
0 commit comments