-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy path04_setup_env.sh
More file actions
executable file
·461 lines (402 loc) · 14.3 KB
/
04_setup_env.sh
File metadata and controls
executable file
·461 lines (402 loc) · 14.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
#!/bin/bash
# Script: 04_setup_env.sh
# Purpose: Create ML virtual environment and set up environment variables
# Usage: source 04_setup_env.sh [--auto] [env_name]
# Source bashrc to ensure environment is properly loaded
if [ -f ~/.bashrc ]; then
source ~/.bashrc
fi
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
print_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
print_error() { echo -e "${RED}[ERROR]${NC} $1"; }
print_warning() { echo -e "${YELLOW}[WARNING]${NC} $1"; }
print_command() { echo -e "${BLUE}[RUN]${NC} $1"; }
resolve_env_type() {
case "$1" in
1|deepseek_lmdeploy|deepseek-lmdeploy)
echo "deepseek-lmdeploy"
;;
2|deepseek_sglang|deepseek-sglang)
echo "deepseek-sglang"
;;
3|deepseek_vllm|deepseek-vllm)
echo "deepseek-vllm"
;;
4|glm_sglang|glm45-sglang)
echo "glm-sglang"
;;
5|glm_transformers|glm-transformers)
echo "glm-transformers"
;;
6|glm_vllm|glm-vllm)
echo "glm-vllm"
;;
7|gptoss_transformers|gpt-oss_transformers|gptoss-transformers|gpt-oss-transformers)
echo "gpt-oss-transformers"
;;
8|gptoss_vllm|gpt-oss_vllm|vllm_gptoss|gptoss-vllm|gpt-oss-vllm)
echo "gpt-oss-vllm"
;;
9|kimi_ktransformers|kimi-ktransformers)
echo "kimi-ktransformers"
;;
10|kimi_sglang|kimi-sglang)
echo "kimi-sglang"
;;
11|kimi_vllm|kimi-vllm)
echo "kimi-vllm"
;;
12|minimax_ktransformers|minimax-ktransformers)
echo "minimax-ktransformers"
;;
13|minimax_sglang|minimax-sglang)
echo "minimax-sglang"
;;
14|minimax_transformers|minimax-transformers)
echo "minimax-transformers"
;;
15|minimax_vllm|minimax-vllm)
echo "minimax-vllm"
;;
16|qwen3_sglang|qwen3-sglang)
echo "qwen3-sglang"
;;
17|qwen3_transformers|qwen3-transformers)
echo "qwen3-transformers"
;;
18|qwen3_vllm|qwen3-vllm)
echo "qwen3-vllm"
;;
19|custom)
echo "custom"
;;
*)
if [[ "$1" =~ ^[0-9]+$ ]]; then
return 1
fi
return 1
;;
esac
}
resolve_env_name() {
local env_type="$1"
if [ -z "$env_type" ]; then
echo "custom"
return 0
fi
echo "$env_type"
}
# Check if being sourced
if [[ "${BASH_SOURCE[0]}" != "${0}" ]]; then
BEING_SOURCED=true
else
BEING_SOURCED=false
fi
# Parse arguments
AUTO_MODE=false
ENV_TYPE=""
for arg in "$@"; do
case $arg in
--auto)
AUTO_MODE=true
;;
*)
if [[ ! "$arg" =~ ^-- ]]; then
ENV_TYPE="$arg"
fi
;;
esac
done
# Prompt for environment type if not provided and not in auto mode
if [ -z "$ENV_TYPE" ] && [ "$AUTO_MODE" = false ]; then
echo ""
print_info "Select ML environment type:"
echo "1) DeepSeek-V3.X/R1/OCR (LMDeploy)"
echo "2) DeepSeek-V3.X/R1/OCR (SGLang)"
echo "3) DeepSeek-V3.X/R1/OCR (vLLM)"
echo "4) GLM 4.X (SGLang)"
echo "5) GLM 4.X (Transformers)"
echo "6) GLM 4.X (vLLM)"
echo "7) gpt-oss (Transformers)"
echo "8) gpt-oss (vLLM)"
echo "9) Kimi K2.X (KTransformers)"
echo "10) Kimi K2.X (SGLang)"
echo "11) Kimi K2.X (vLLM)"
echo "12) MiniMax-M2.X (KTransformers)"
echo "13) MiniMax-M2.X (SGLang)"
echo "14) MiniMax-M2.X (Transformers)"
echo "15) MiniMax-M2.X (vLLM)"
echo "16) Qwen3 (SGLang)"
echo "17) Qwen3 (Transformers)"
echo "18) Qwen3 (vLLM)"
echo "19) Custom"
echo ""
while true; do
read -p "Enter your choice (1-19): " choice
if ENV_TYPE=$(resolve_env_type "$choice"); then
break
else
print_error "Invalid choice. Please enter a number between 1 and 19."
fi
done
elif [ -z "$ENV_TYPE" ]; then
# Default to GLM 4.X (SGLang) in auto mode
ENV_TYPE="glm_sglang"
fi
# Normalize environment type when provided directly
if [ -n "$ENV_TYPE" ]; then
if ENV_TYPE_MAPPED=$(resolve_env_type "$ENV_TYPE"); then
ENV_TYPE="$ENV_TYPE_MAPPED"
fi
fi
if [[ "$ENV_TYPE" =~ ^[0-9]+$ ]]; then
print_error "Invalid environment selection: $ENV_TYPE"
if [ "$BEING_SOURCED" = false ]; then
exit 1
else
return 1
fi
fi
# Set environment name based on type
ENV_NAME=$(resolve_env_name "$ENV_TYPE")
ENV_PATH="$HOME/${ENV_NAME}_env"
# Ask for HuggingFace model storage location
DEFAULT_HF_PATH="/workspace/models/huggingface"
if [ "$AUTO_MODE" = false ]; then
echo ""
print_info "Where would you like to store HuggingFace models?"
print_info "Default: $DEFAULT_HF_PATH"
read -p "Enter path (press Enter for default): " HF_PATH_INPUT
if [ -z "$HF_PATH_INPUT" ]; then
HF_PATH="$DEFAULT_HF_PATH"
print_info "Using default path: $HF_PATH"
else
# Expand tilde if present
HF_PATH="${HF_PATH_INPUT/#\~/$HOME}"
print_info "Using custom path: $HF_PATH"
fi
else
HF_PATH="$DEFAULT_HF_PATH"
print_info "Using default HuggingFace path: $HF_PATH"
fi
# Check prerequisites
print_info "Checking prerequisites..."
if ! command -v python &> /dev/null; then
print_error "Python is not available on PATH. Please ensure Python is installed and accessible before running this script."
if [ "$BEING_SOURCED" = false ]; then
exit 1
else
return 1
fi
fi
PYTHON_BIN=$(command -v python)
PYTHON_VERSION=$($PYTHON_BIN --version 2>&1)
print_info "Using Python from: $PYTHON_BIN ($PYTHON_VERSION)"
if ! command -v uv &> /dev/null; then
print_error "uv is not installed. Please run check_python.sh first."
if [ "$BEING_SOURCED" = false ]; then
exit 1
else
return 1
fi
fi
# Check if environment exists and handle rebuild
if [ -d "$ENV_PATH" ]; then
print_warning "⚠️ Environment $ENV_NAME already exists at $ENV_PATH"
if [ "$AUTO_MODE" = false ]; then
echo ""
print_info "Do you want to rebuild it? This will:"
print_info " • Delete the existing environment directory"
print_info " • Remove all installed packages"
print_info " • Create a fresh environment"
echo ""
while true; do
read -p "Rebuild environment? (y/n): " RECREATE
case ${RECREATE,,} in
y|yes)
print_info "Destroying existing environment..."
print_command "rm -rf $ENV_PATH"
rm -rf "$ENV_PATH"
print_info "✓ Environment destroyed"
break
;;
n|no)
print_info "Keeping existing environment"
break
;;
*)
print_error "Please answer 'y' for yes or 'n' for no"
;;
esac
done
else
RECREATE="n"
print_info "Using existing environment (use without --auto to be prompted)"
fi
fi
if [ ! -d "$ENV_PATH" ]; then
print_info "Creating virtual environment at $ENV_PATH using $PYTHON_BIN..."
uv venv "$ENV_PATH" --python "$PYTHON_BIN"
if [ $? -eq 0 ]; then
print_info "✓ Virtual environment created successfully"
else
print_error "Failed to create virtual environment"
if [ "$BEING_SOURCED" = false ]; then
exit 1
else
return 1
fi
fi
fi
# Detect GPU architecture
print_info "Detecting GPU architecture..."
TORCH_CUDA_ARCH_LIST=""
if command -v nvidia-smi &> /dev/null; then
GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1 | tr '[:lower:]' '[:upper:]')
if [ -n "$GPU_NAME" ]; then
print_info "Detected GPU: $GPU_NAME"
# Determine architecture based on GPU model
if [[ "$GPU_NAME" == *"V100"* ]]; then
TORCH_CUDA_ARCH_LIST="7.0"
print_info " → $GPU_NAME (Volta) detected: sm_70"
elif [[ "$GPU_NAME" == *"T4"* ]] || \
([[ "$GPU_NAME" == *"RTX 5000"* ]] && [[ "$GPU_NAME" != *"ADA"* ]]) || \
([[ "$GPU_NAME" == *"RTX 4000"* ]] && [[ "$GPU_NAME" != *"ADA"* ]]) || \
([[ "$GPU_NAME" == *"RTX 6000"* ]] && [[ "$GPU_NAME" != *"ADA"* ]]); then
TORCH_CUDA_ARCH_LIST="7.5"
print_info " → $GPU_NAME (Turing) detected: sm_75"
elif [[ "$GPU_NAME" == *"A100"* ]] || [[ "$GPU_NAME" == *"A30"* ]]; then
TORCH_CUDA_ARCH_LIST="8.0"
print_info " → $GPU_NAME (Ampere) detected: sm_80"
elif [[ "$GPU_NAME" == *"RTX 3090"* ]] || [[ "$GPU_NAME" == *"3090"* ]] || \
[[ "$GPU_NAME" == *"RTX 3080"* ]] || [[ "$GPU_NAME" == *"3080"* ]] || \
[[ "$GPU_NAME" == *"RTX 3070"* ]] || [[ "$GPU_NAME" == *"3070"* ]] || \
[[ "$GPU_NAME" == *"RTX A6000"* ]] || [[ "$GPU_NAME" == *"A6000"* ]] || \
[[ "$GPU_NAME" == *"RTX A5000"* ]] || [[ "$GPU_NAME" == *"A5000"* ]] || \
[[ "$GPU_NAME" == *"RTX A4500"* ]] || [[ "$GPU_NAME" == *"A4500"* ]] || \
[[ "$GPU_NAME" == *"RTX A4000"* ]] || [[ "$GPU_NAME" == *"A4000"* ]] || \
[[ "$GPU_NAME" == *"RTX A2000"* ]] || [[ "$GPU_NAME" == *"A2000"* ]] || \
[[ "$GPU_NAME" == *"A10"* ]] || [[ "$GPU_NAME" == *"A40"* ]]; then
TORCH_CUDA_ARCH_LIST="8.6"
print_info " → $GPU_NAME (Ampere) detected: sm_86"
elif [[ "$GPU_NAME" == *"RTX 4090"* ]] || [[ "$GPU_NAME" == *"4090"* ]] || \
[[ "$GPU_NAME" == *"RTX 4070 TI"* ]] || [[ "$GPU_NAME" == *"4070 TI"* ]] || \
[[ "$GPU_NAME" == *"L40S"* ]] || [[ "$GPU_NAME" == *"L40"* ]] || [[ "$GPU_NAME" == *"L4"* ]] || \
([[ "$GPU_NAME" == *"RTX 6000"* ]] && [[ "$GPU_NAME" == *"ADA"* ]]) || \
([[ "$GPU_NAME" == *"RTX 5000"* ]] && [[ "$GPU_NAME" == *"ADA"* ]]) || \
([[ "$GPU_NAME" == *"RTX 4000"* ]] && [[ "$GPU_NAME" == *"ADA"* ]]); then
TORCH_CUDA_ARCH_LIST="8.9"
print_info " → $GPU_NAME (Ada Lovelace) detected: sm_89"
elif [[ "$GPU_NAME" == *"H100"* ]] || [[ "$GPU_NAME" == *"H200"* ]] || [[ "$GPU_NAME" == *"GH200"* ]]; then
TORCH_CUDA_ARCH_LIST="9.0"
print_info " → $GPU_NAME (Hopper) detected: sm_90"
elif [[ "$GPU_NAME" == *"B200"* ]]; then
TORCH_CUDA_ARCH_LIST="10.0"
print_info " → $GPU_NAME (Blackwell) detected: sm_100"
elif [[ "$GPU_NAME" == *"RTX 5090"* ]] || [[ "$GPU_NAME" == *"5090"* ]] || \
([[ "$GPU_NAME" == *"RTX PRO 6000"* ]] && [[ "$GPU_NAME" == *"BLACKWELL"* ]]); then
TORCH_CUDA_ARCH_LIST="12.0"
print_info " → $GPU_NAME (Blackwell) detected: sm_120"
else
print_warning " → Unknown GPU model, will use default PyTorch CUDA architectures"
fi
if [ -n "$TORCH_CUDA_ARCH_LIST" ]; then
print_info " → Set TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST"
fi
else
print_warning "Could not detect GPU name"
fi
else
print_warning "nvidia-smi not found - no GPU detected"
fi
echo ""
# Create activation script with environment variables
print_info "Creating activation script with ML environment variables..."
cat > "$ENV_PATH/activate_ml" << EOF
#!/bin/bash
# Activate virtual environment
source "$ENV_PATH/bin/activate"
# Set ML environment variables
export HF_HOME="$HF_PATH"
export HUGGINGFACE_HUB_CACHE="$HF_PATH"
# GPU architecture for PyTorch
${TORCH_CUDA_ARCH_LIST:+export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST"}
echo "ML environment activated with:"
echo " - Virtual env: $ENV_PATH"
echo " - HF_HOME: $HF_PATH"
${TORCH_CUDA_ARCH_LIST:+echo " - TORCH_CUDA_ARCH_LIST: $TORCH_CUDA_ARCH_LIST"}
echo " - Python: \$(python --version)"
EOF
chmod +x "$ENV_PATH/activate_ml"
# Add environment variables to .bashrc if not present
print_info "Updating ~/.bashrc with environment variables..."
if ! grep -q "HF_HOME=" ~/.bashrc; then
cat >> ~/.bashrc << EOF
# ML Environment Variables
export HF_HOME="$HF_PATH"
export HUGGINGFACE_HUB_CACHE="$HF_PATH"
${TORCH_CUDA_ARCH_LIST:+export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST"}
EOF
print_info "Added HF_HOME to ~/.bashrc"
if [ -n "$TORCH_CUDA_ARCH_LIST" ]; then
print_info "Added TORCH_CUDA_ARCH_LIST to ~/.bashrc"
fi
fi
# Create directory structure
print_info "Creating directory structure..."
# Get parent directories from HF_PATH
HF_PARENT=$(dirname "$HF_PATH")
HF_GRANDPARENT=$(dirname "$HF_PARENT")
DIRS=(
"$HF_GRANDPARENT"
"$HF_PARENT"
"$HF_PATH"
"/workspace/scripts"
"/workspace/logs"
)
for dir in "${DIRS[@]}"; do
if [ ! -d "$dir" ]; then
mkdir -p "$dir" 2>/dev/null || {
print_warning "Could not create $dir - you may need to create it manually"
}
else
print_info "✓ $dir exists"
fi
done
echo ""
print_info "✅ ML environment setup complete!"
echo ""
# ACTIVATE IF BEING SOURCED
if [ "$BEING_SOURCED" = true ]; then
print_info "Activating ML environment..."
source "$ENV_PATH/bin/activate"
# Set environment variables
export HF_HOME="$HF_PATH"
export HUGGINGFACE_HUB_CACHE="$HF_PATH"
# Set GPU architecture if detected
if [ -n "$TORCH_CUDA_ARCH_LIST" ]; then
export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST"
print_info " TORCH_CUDA_ARCH_LIST: $TORCH_CUDA_ARCH_LIST"
fi
echo ""
print_info "✓ Environment activated!"
print_info " Python: $(which python)"
print_info " Version: $(python --version)"
else
# Show activation instructions when run as script
print_info "To activate the environment:"
echo ""
print_command "source $ENV_PATH/activate_ml"
echo ""
print_info "Or use the alias (after reloading shell):"
print_command "source ~/.bashrc"
print_command "$ENV_NAME"
echo ""
print_info "Or source this script to create and activate:"
print_command "source $0"
fi