-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgpu_setup.sh
More file actions
executable file
·122 lines (109 loc) · 5.1 KB
/
gpu_setup.sh
File metadata and controls
executable file
·122 lines (109 loc) · 5.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env bash
# =============================================================================
# gpu_setup.sh — ONE-TIME remote GPU setup
# =============================================================================
# Run this exactly once from your local Mac. It will:
# 1. Copy your SSH public key to the remote so you never type a password again
# 2. Install system packages (tmux, python3-venv) on the remote
# 3. Create the project directory and a Python venv on the remote
# 4. Install PyTorch (CUDA 12.6) + all project requirements
# 5. Sync the local data/ directory to the remote (312 MB, one-time transfer)
#
# Usage:
# chmod +x gpu_setup.sh
# ./gpu_setup.sh
# =============================================================================
set -euo pipefail
# ── Remote config ─────────────────────────────────────────────────────────────
REMOTE_USER="newuser"
REMOTE_HOST="100.37.41.165"
REMOTE_DIR="/home/newuser/Efficient-Distillation"
REMOTE="${REMOTE_USER}@${REMOTE_HOST}"
SUDO_PASS="password" # sudo password on the remote machine
echo "============================================================"
echo " Remote GPU Setup"
echo " Target: ${REMOTE}:${REMOTE_DIR}"
echo "============================================================"
echo ""
# ── Step 1: Copy SSH public key so future connections are passwordless ─────────
echo "[1/5] Setting up passwordless SSH (you will be asked for your password ONCE) ..."
# Generate an SSH key pair if none exists yet
if [ ! -f ~/.ssh/id_ed25519 ]; then
echo " No SSH key found — generating one now ..."
ssh-keygen -t ed25519 -N "" -f ~/.ssh/id_ed25519
fi
ssh-copy-id -i ~/.ssh/id_ed25519.pub "${REMOTE}"
echo " ✓ SSH key installed. You will not need to type a password again."
echo ""
# ── Step 2: Install system packages on remote ─────────────────────────────────
echo "[2/5] Installing system packages on remote (tmux, python3-venv) ..."
ssh "${REMOTE}" "echo '${SUDO_PASS}' | sudo -S apt-get update -qq && echo '${SUDO_PASS}' | sudo -S apt-get install -y tmux python3-venv python3-pip"
echo " ✓ System packages ready."
echo ""
# ── Step 3: Create project directory and Python venv ──────────────────────────
echo "[3/5] Creating project directory and Python virtual environment ..."
ssh "${REMOTE}" "
mkdir -p ${REMOTE_DIR}
cd ${REMOTE_DIR}
python3 -m venv .venv
.venv/bin/pip install --upgrade pip --quiet
"
echo " ✓ Virtual environment created at ${REMOTE_DIR}/.venv"
echo ""
# ── Step 4: Install PyTorch (CUDA) + project requirements ─────────────────────
echo "[4/5] Installing PyTorch with CUDA 12.6 + project dependencies ..."
echo " (This takes 3-5 minutes — PyTorch is ~2 GB)"
echo ""
ssh "${REMOTE}" "
cd ${REMOTE_DIR}
# RTX 5090 is Blackwell (sm_120) — requires PyTorch nightly with CUDA 12.8.
# Stable PyTorch only supports up to sm_90 (Ada/Hopper); using nightly is mandatory.
.venv/bin/pip install --pre torch torchvision \
--index-url https://download.pytorch.org/whl/nightly/cu128 \
--quiet
# Project requirements from requirements.txt
.venv/bin/pip install \
timm>=0.9.12 \
transformers>=4.40.0 \
numpy>=1.24 \
Pillow \
matplotlib \
tqdm>=4.66 \
pyyaml>=6.0 \
wandb>=0.17.0 \
--quiet
# Confirm GPU is visible
echo ''
echo 'PyTorch CUDA check:'
.venv/bin/python -c \"
import torch
print(f' PyTorch version : {torch.__version__}')
print(f' CUDA available : {torch.cuda.is_available()}')
if torch.cuda.is_available():
print(f' GPU : {torch.cuda.get_device_name(0)}')
print(f' VRAM : {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB')
\"
"
echo ""
echo " ✓ All Python dependencies installed."
echo ""
# ── Step 5: Sync the data directory to remote (one-time, 312 MB) ──────────────
echo "[5/5] Syncing data/ to remote (312 MB — this takes ~1 minute on a good connection) ..."
rsync -avz --progress \
--exclude="__pycache__/" \
--exclude="*.pyc" \
data/ "${REMOTE}:${REMOTE_DIR}/data/"
echo ""
echo " ✓ Data synced."
echo ""
# ── Done ──────────────────────────────────────────────────────────────────────
echo "============================================================"
echo " Setup complete!"
echo ""
echo " You can now run any command on the GPU with:"
echo " ./gpu_run.sh python finetuning/frozenBase_customHead.py \\"
echo " --student_checkpoint none \\"
echo " --nyu_root data/nyu_depth_v2 \\"
echo " --epochs 30 \\"
echo " --save_dir checkpoints/depth_head"
echo "============================================================"