Merge pull request #4 from jipc3/construccion #4
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Dynamic Databricks Notebook Deploy | |
| on: | |
| push: | |
| branches: | |
| - main | |
| jobs: | |
| deploy: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout Repository | |
| uses: actions/checkout@v3 | |
| - name: Install jq & curl | |
| run: sudo apt-get update && sudo apt-get install -y jq curl | |
| - name: Export multiple notebooks (raw) | |
| run: | | |
| ORIGIN_HOST=${{ secrets.DATABRICKS_ORIGIN_HOST }} | |
| ORIGIN_TOKEN=${{ secrets.DATABRICKS_ORIGIN_TOKEN }} | |
| NOTEBOOK_BASE="/Workspace/Users/jeremypalma2022@gmail.com/Proyect_Databricks_GitHub" | |
| NOTEBOOK_PATHS=( | |
| "process/Ingest_supercias_compania" | |
| "process/Ingest_supercias_ranking" | |
| "process/Ingest_supercias_sector" | |
| "process/Ingest_supercias_segmento" | |
| "process/Load_supercias" | |
| "process/Transform_supercias" | |
| "scripts/Preparacion_Ambiente" | |
| ) # Agrega más rutas según necesites | |
| mkdir -p notebooks_to_deploy | |
| for nb_path in "${NOTEBOOK_PATHS[@]}"; do | |
| nb=$(basename "$nb_path") | |
| echo "Exportando $nb_path en modo raw..." | |
| curl -s -X GET \ | |
| -H "Authorization: Bearer $ORIGIN_TOKEN" \ | |
| "$ORIGIN_HOST/api/2.0/workspace/export?path=$NOTEBOOK_BASE/$nb_path&format=SOURCE&direct_download=true" \ | |
| --output "notebooks_to_deploy/$nb.py" | |
| done | |
| - name: Deploy notebooks to Destination Workspace | |
| run: | | |
| DEST_HOST=${{ secrets.DATABRICKS_DEST_HOST }} | |
| DEST_TOKEN=${{ secrets.DATABRICKS_DEST_TOKEN }} | |
| DEST_BASE="/prod/scripts/main" | |
| for file in notebooks_to_deploy/*.py; do | |
| name=$(basename "$file" .py) | |
| dest_path="$DEST_BASE/$name" | |
| echo "Creando carpeta $DEST_BASE si no existe..." | |
| curl -s -X POST \ | |
| -H "Authorization: Bearer $DEST_TOKEN" \ | |
| -H "Content-Type: application/json" \ | |
| -d "{\"path\":\"$DEST_BASE\"}" \ | |
| "$DEST_HOST/api/2.0/workspace/mkdirs" | |
| echo "Importando $file → $dest_path" | |
| response=$(curl -s -X POST \ | |
| -H "Authorization: Bearer $DEST_TOKEN" \ | |
| -H "Content-Type: multipart/form-data" \ | |
| -F "path=$dest_path" \ | |
| -F "format=SOURCE" \ | |
| -F "language=PYTHON" \ | |
| -F "overwrite=true" \ | |
| -F "content=@$file" \ | |
| "$DEST_HOST/api/2.0/workspace/import") | |
| echo "Response: $response" | |
| done | |
| - name: Check if workflow exists and delete if necessary | |
| run: | | |
| DEST_HOST=${{ secrets.DATABRICKS_DEST_HOST }} | |
| DEST_TOKEN=${{ secrets.DATABRICKS_DEST_TOKEN }} | |
| WORKFLOW_NAME="WF_ADB" | |
| echo "Verificando si existe el workflow: $WORKFLOW_NAME" | |
| # Listar todos los workflows y buscar por nombre | |
| workflows_response=$(curl -s -X GET \ | |
| -H "Authorization: Bearer $DEST_TOKEN" \ | |
| "$DEST_HOST/api/2.1/jobs/list") | |
| # Extraer job_id si existe el workflow | |
| existing_job_id=$(echo "$workflows_response" | jq -r --arg name "$WORKFLOW_NAME" '.jobs[]? | select(.settings.name == $name) | .job_id') | |
| if [ "$existing_job_id" != "" ] && [ "$existing_job_id" != "null" ]; then | |
| echo "Workflow encontrado con ID: $existing_job_id. Eliminando..." | |
| delete_response=$(curl -s -X POST \ | |
| -H "Authorization: Bearer $DEST_TOKEN" \ | |
| -H "Content-Type: application/json" \ | |
| -d "{\"job_id\": $existing_job_id}" \ | |
| "$DEST_HOST/api/2.1/jobs/delete") | |
| echo "Delete response: $delete_response" | |
| else | |
| echo "No se encontró workflow existente con nombre: $WORKFLOW_NAME" | |
| fi | |
| - name: Create Databricks Workflow WF_ADB | |
| run: | | |
| DEST_HOST=${{ secrets.DATABRICKS_DEST_HOST }} | |
| DEST_TOKEN=${{ secrets.DATABRICKS_DEST_TOKEN }} | |
| DEST_BASE="/py/scripts/main" | |
| echo "Creando workflow: WF_ADB" | |
| # Crear el JSON del workflow | |
| cat > workflow_config.json << 'EOF' | |
| { | |
| "name": "WF_ADB", | |
| "format": "MULTI_TASK", | |
| "tasks": [ | |
| { | |
| "task_key": "Preparacion_Ambiente", | |
| "description": "Ejecuta notebook Preparacion_Ambiente", | |
| "notebook_task": { | |
| "notebook_path": "/prod/scripts/main/Preparacion_Ambiente", | |
| "source": "WORKSPACE" | |
| }, | |
| "job_cluster_key": "serverless_cluster", | |
| "timeout_seconds": 3600, | |
| "max_retries": 2 | |
| }, | |
| { | |
| "task_key": "Ingest_supercias_compania", | |
| "description": "Ejecuta notebook Ingest_supercias_compania", | |
| "notebook_task": { | |
| "notebook_path": "/prod/scripts/main/Ingest_supercias_compania", | |
| "source": "WORKSPACE" | |
| }, | |
| "job_cluster_key": "serverless_cluster", | |
| "timeout_seconds": 3600, | |
| "max_retries": 2, | |
| "depends_on": [ | |
| { | |
| "task_key": "Preparacion_Ambiente" | |
| } | |
| ] | |
| }, | |
| { | |
| "task_key": "Ingest_supercias_ranking", | |
| "description": "Ejecuta notebook Ingest_supercias_ranking", | |
| "notebook_task": { | |
| "notebook_path": "/prod/scripts/main/Ingest_supercias_ranking", | |
| "source": "WORKSPACE" | |
| }, | |
| "job_cluster_key": "serverless_cluster", | |
| "timeout_seconds": 3600, | |
| "max_retries": 2, | |
| "depends_on": [ | |
| { | |
| "task_key": "Preparacion_Ambiente" | |
| } | |
| ] | |
| }, | |
| { | |
| "task_key": "Ingest_supercias_sector", | |
| "description": "Ejecuta notebook Ingest_supercias_sector", | |
| "notebook_task": { | |
| "notebook_path": "/prod/scripts/main/Ingest_supercias_sector", | |
| "source": "WORKSPACE" | |
| }, | |
| "job_cluster_key": "serverless_cluster", | |
| "timeout_seconds": 3600, | |
| "max_retries": 2, | |
| "depends_on": [ | |
| { | |
| "task_key": "Preparacion_Ambiente" | |
| } | |
| ] | |
| }, | |
| { | |
| "task_key": "Ingest_supercias_segmento", | |
| "description": "Ejecuta notebook Ingest_supercias_segmento", | |
| "notebook_task": { | |
| "notebook_path": "/prod/scripts/main/Ingest_supercias_segmento", | |
| "source": "WORKSPACE" | |
| }, | |
| "job_cluster_key": "serverless_cluster", | |
| "timeout_seconds": 3600, | |
| "max_retries": 2, | |
| "depends_on": [ | |
| { | |
| "task_key": "Preparacion_Ambiente" | |
| } | |
| ] | |
| }, | |
| { | |
| "task_key": "Transform_supercias", | |
| "description": "Ejecuta notebook Transform_supercias", | |
| "notebook_task": { | |
| "notebook_path": "/prod/scripts/main/Transform_supercias", | |
| "source": "WORKSPACE" | |
| }, | |
| "job_cluster_key": "serverless_cluster", | |
| "timeout_seconds": 3600, | |
| "max_retries": 2, | |
| "depends_on": [ | |
| { | |
| "task_key": "Ingest_supercias_compania" | |
| }, | |
| { | |
| "task_key": "Ingest_supercias_ranking" | |
| }, | |
| { | |
| "task_key": "Ingest_supercias_sector" | |
| }, | |
| { | |
| "task_key": "Ingest_supercias_segmento" | |
| } | |
| ] | |
| }, | |
| { | |
| "task_key": "Load_supercias", | |
| "description": "Ejecuta notebook Load_supercias", | |
| "notebook_task": { | |
| "notebook_path": "/prod/scripts/main/Load_supercias", | |
| "source": "WORKSPACE" | |
| }, | |
| "job_cluster_key": "serverless_cluster", | |
| "timeout_seconds": 3600, | |
| "max_retries": 2, | |
| "depends_on": [ | |
| { | |
| "task_key": "Transform_supercias" | |
| } | |
| ] | |
| } | |
| ], | |
| "job_clusters": [ | |
| { | |
| "job_cluster_key": "serverless_cluster", | |
| "new_cluster": { | |
| "spark_version": "16.4.x-scala2.12", | |
| "node_type_id": "Standard_D4s_v3", | |
| "driver_node_type_id": "Standard_D4s_v3", | |
| "num_workers": 0, | |
| "data_security_mode": "SINGLE_USER", | |
| "runtime_engine": "STANDARD", | |
| "custom_tags": { | |
| "environment": "production", | |
| "team": "ext_users" | |
| } | |
| } | |
| } | |
| ], | |
| "schedule": { | |
| "quartz_cron_expression": "0 0 8 * * ?", | |
| "timezone_id": "America/Lima", | |
| "pause_status": "PAUSED" | |
| }, | |
| "email_notifications": { | |
| "on_failure": [], | |
| "on_success": [], | |
| "no_alert_for_skipped_runs": false | |
| }, | |
| "webhook_notifications": {}, | |
| "timeout_seconds": 7200, | |
| "max_concurrent_runs": 1, | |
| "tags": { | |
| "environment": "production", | |
| "created_by": "github_actions", | |
| "project": "automated_deployment" | |
| } | |
| } | |
| EOF | |
| # Crear el workflow | |
| create_response=$(curl -s -X POST \ | |
| -H "Authorization: Bearer $DEST_TOKEN" \ | |
| -H "Content-Type: application/json" \ | |
| -d @workflow_config.json \ | |
| "$DEST_HOST/api/2.1/jobs/create") | |
| echo "Workflow creation response: $create_response" | |
| # Extraer job_id del response | |
| job_id=$(echo "$create_response" | jq -r '.job_id') | |
| if [ "$job_id" != "" ] && [ "$job_id" != "null" ]; then | |
| echo "Workflow 'WF_ADB' creado exitosamente con ID: $job_id" | |
| # Obtener detalles del workflow creado | |
| workflow_details=$(curl -s -X GET \ | |
| -H "Authorization: Bearer $DEST_TOKEN" \ | |
| "$DEST_HOST/api/2.1/jobs/get?job_id=$job_id") | |
| echo "Detalles del workflow:" | |
| echo "$workflow_details" | jq '.settings | {name, tasks: (.tasks | map({task_key, notebook_task: .notebook_task.notebook_path}))}' | |
| else | |
| echo "Error al crear el workflow" | |
| echo "Response completo: $create_response" | |
| exit 1 | |
| fi | |
| - name: Validate Workflow Configuration | |
| run: | | |
| DEST_HOST=${{ secrets.DATABRICKS_DEST_HOST }} | |
| DEST_TOKEN=${{ secrets.DATABRICKS_DEST_TOKEN }} | |
| WORKFLOW_NAME="WF_ADB" | |
| echo "Validando la configuración del workflow creado..." | |
| # Obtener lista de workflows y encontrar el recién creado | |
| workflows_list=$(curl -s -X GET \ | |
| -H "Authorization: Bearer $DEST_TOKEN" \ | |
| "$DEST_HOST/api/2.1/jobs/list") | |
| job_id=$(echo "$workflows_list" | jq -r --arg name "$WORKFLOW_NAME" '.jobs[]? | select(.settings.name == $name) | .job_id') | |
| if [ "$job_id" != "" ] && [ "$job_id" != "null" ]; then | |
| echo "Workflow encontrado con ID: $job_id" | |
| # Obtener configuración detallada | |
| job_details=$(curl -s -X GET \ | |
| -H "Authorization: Bearer $DEST_TOKEN" \ | |
| "$DEST_HOST/api/2.1/jobs/get?job_id=$job_id") | |
| echo "Resumen del workflow:" | |
| echo "Nombre: $(echo "$job_details" | jq -r '.settings.name')" | |
| echo "Número de tareas: $(echo "$job_details" | jq '.settings.tasks | length')" | |
| echo "" | |
| echo "Tareas configuradas:" | |
| echo "$job_details" | jq -r '.settings.tasks[] | "- " + .task_key + " → " + .notebook_task.notebook_path' | |
| echo "" | |
| echo "Cluster configurado:" | |
| echo "Tipo: $(echo "$job_details" | jq -r '.settings.job_clusters[0].new_cluster.node_type_id')" | |
| echo "Workers: $(echo "$job_details" | jq -r '.settings.job_clusters[0].new_cluster.num_workers')" | |
| echo "Spark Version: $(echo "$job_details" | jq -r '.settings.job_clusters[0].new_cluster.spark_version')" | |
| else | |
| echo "No se pudo encontrar el workflow creado" | |
| exit 1 | |
| fi | |
| - name: Clean up | |
| run: | | |
| rm -rf notebooks_to_deploy | |
| rm -f workflow_config.json | |
| - name: Done | |
| run: | | |
| echo "¡Despliegue completado exitosamente!" | |
| echo "" | |
| echo "Resumen:" | |
| echo "Notebooks desplegados: ntbk_1, ntbk_2" | |
| echo "Workflow creado: WF_ADB" | |
| echo "Tareas configuradas:" | |
| echo " - tarea1_notebook1 (ntbk_1)" | |
| echo " - tarea2_notebook2 (ntbk_2)" | |
| echo "Cluster: Serverless configurado" | |
| echo "" | |
| echo "Accede a tu workspace de Databricks para ver el workflow en la sección 'Workflows'" |