Skip to content

Commit

Permalink
Merge pull request #3 from agahkarakuzu/master
Browse files Browse the repository at this point in the history
[FIX] Terraform 0.12 compatibility, network issues, file provisions, kubelet configs et al.
  • Loading branch information
agahkarakuzu committed Sep 14, 2024
2 parents 255f1f0 + 4aa1596 commit a4744e6
Show file tree
Hide file tree
Showing 23 changed files with 591 additions and 336 deletions.
2 changes: 1 addition & 1 deletion cloud-init/kubeadm/common.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ write_files:
permissions: 0755
content: |
#!/bin/bash
git clone https://github.com/neurolibre/kubeadm-bootstrap.git /tmp/kubeadm-bootstrap
git clone https://github.com/agahkarakuzu/kubeadm-bootstrap.git /tmp/kubeadm-bootstrap
cd /tmp/kubeadm-bootstrap
sudo ./install-kubeadm.bash
Expand Down
16 changes: 11 additions & 5 deletions cloud-init/kubeadm/master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,28 @@ packages:
- nfs-kernel-server

runcmd:
# NFS data
- mkdir -p /DATA
- echo "192.168.73.179:/DATA /DATA nfs rw,noatime,nolock,hard,tcp 0 0" >> /etc/fstab
# Mount SFTP to /DATA directory
- mkdir -p ${sftp_dir}
- echo "${sftp_ip}:${sftp_dir} ${sftp_dir} nfs rw,noatime,nolock,hard,tcp 0 0" >> /etc/fstab
- mount -a
# NFS
# NFS export /shared directory and make it accessible to all subnet
- echo "/shared `hostname -I | awk '{print $1}' | grep -Po '(\d+\.){3}'`0/24(rw,sync,no_root_squash,no_all_squash)" > /etc/exports
- mkdir -p /shared
- chown ${admin_user} /shared
- systemctl enable nfs-kernel-server
- systemctl restart nfs-kernel-server
# Kubeadm
- echo "SUDO_UID=${admin_user} sudo -E /tmp/kubeadm-bootstrap/init-master.bash" >> /tmp/install.sh
# Initialize kubeadm
- su ${admin_user} -c "/tmp/install.sh"
# Share the join command with all the nodes
- kubeadm token create --print-join-command > /shared/kubeadm-join.bash
- kubectl label nodes `hostname` type=production
# k8s commands
- cat /tmp/kubeadm-bootstrap/.bashrc >> /home/${admin_user}/.bashrc
# ssh config for nodes
# make sure that k8s node is ready, and enable easy ssh
- echo "Adding nodes"
- su ${admin_user} -c "touch /home/${admin_user}/.ssh/config"
- mv /tmp/kubeadm-bootstrap/add_nodes.bash /home/${admin_user}/
- chmod u+x /home/${admin_user}/add_nodes.bash
Expand All @@ -31,7 +34,10 @@ runcmd:
- echo "Host registry" >> /home/${admin_user}/.ssh/config;
- echo " HostName "${docker_registry} >> /home/${admin_user}/.ssh/config;
- echo " User "${admin_user} >> /home/${admin_user}/.ssh/config;
- su ${admin_user} -c "sudo docker login ${docker_registry} --username ${docker_id} --password ${docker_password}"
#- su ${admin_user} -c "sudo docker login ${docker_registry} --username ${docker_id} --password ${docker_password}"
- sudo groupadd docker
- sudo usermod -aG docker ${admin_user}
- su ${admin_user} -c "docker login ${docker_registry} --username ${docker_id} --password ${docker_password}"
- while [ ! -d /var/lib/kubelet/ ]; do sleep 1; done;
- cp /home/${admin_user}/.docker/config.json /var/lib/kubelet/
# creating flag to say that master configuration finished
Expand Down
9 changes: 6 additions & 3 deletions cloud-init/kubeadm/node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@ runcmd:
# NFS
- mkdir -p /shared
- echo "${master_ip}:/shared /shared nfs rw,noatime,nolock,hard,tcp 0 0" >> /etc/fstab
- mkdir -p /DATA
- echo "192.168.73.179:/DATA /DATA nfs rw,noatime,nolock,hard,tcp 0 0" >> /etc/fstab
- mkdir -p ${sftp_dir}
- echo "${sftp_ip}:${sftp_dir} ${sftp_dir} nfs rw,noatime,nolock,hard,tcp 0 0" >> /etc/fstab
- mount -a
# Kubeadm
- echo "while [ ! -f /shared/kubeadm-join.bash ]; do sleep 1; done; sudo bash /shared/kubeadm-join.bash" >> /tmp/install.sh
- su ${admin_user} -c "/tmp/install.sh"
# authorizing docker
- su ${admin_user} -c "sudo docker login ${docker_registry} --username ${docker_id} --password ${docker_password}"
#- su ${admin_user} -c "sudo docker login ${docker_registry} --username ${docker_id} --password ${docker_password}"
- sudo groupadd docker
- sudo usermod -aG docker ${admin_user}
- su ${admin_user} -c "docker login ${docker_registry} --username ${docker_id} --password ${docker_password}"
- while [ ! -d /var/lib/kubelet/ ]; do sleep 1; done;
- cp /home/${admin_user}/.docker/config.json /var/lib/kubelet/
Binary file added terraform-modules/.DS_Store
Binary file not shown.
Binary file added terraform-modules/binderhub/.DS_Store
Binary file not shown.
7 changes: 7 additions & 0 deletions terraform-modules/binderhub/assets/cloudflare-secret.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: v1
kind: Secret
metadata:
name: cloudflare-api-token-secret
type: Opaque
stringData:
api-token: "${cloudflare_token}"
44 changes: 12 additions & 32 deletions terraform-modules/binderhub/assets/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,27 @@ jupyterhub:
ingress:
enabled: true
hosts:
- binder.conp.cloud
- test.conp.cloud
annotations:
kubernetes.io/ingress.class: nginx
kubernetes.io/tls-acme: "true"
cert-manager.io/issuer: letsencrypt-production
tls:
- secretName: binder-conp-cloud-tls
hosts:
- binder.conp.cloud
- test.conp.cloud
#https://discourse.jupyter.org/t/pre-building-images-on-binderhub/4325/4
prePuller:
continuous:
enabled: true
hub:
baseUrl: /jupyter/
image:
name: jupyterhub/k8s-hub
tag: 1.1.3-n141.h28efde1b
name: quay.io/jupyterhub/k8s-hub
tag: "3.3.5"
config:
BinderSpawner:
cors_allow_origin: '*'
proxy:
service:
type: NodePort
Expand All @@ -34,49 +37,25 @@ jupyterhub:
every: 30
concurrency: 5 #to avoid Hub slow down, 5 concurrent processes
maxAge: 7200 #2h
# https://z2jh.jupyter.org/en/latest/resources/reference.html#singleuser
singleuser:
storage:
type: none
extraVolumes:
- name: shared-data
hostPath:
path: /DATA
- name: book-data
hostPath:
path: /DATA/book-artifacts
- name: repo2data
hostPath:
path: /DATA
extraVolumeMounts:
- name: shared-data
mountPath: /home/jovyan/data # where each user can reach shared data
readOnly : true
- name: book-data
mountPath: /mnt/books
- name: repo2data
mountPath: /mnt/data
memory:
guarantee: 4G
limit: 8G
cpu:
guarantee: 1
startTimeout: 3600 #1h
#for repo2data https://zero-to-jupyterhub.readthedocs.io/en/latest/resources/reference.html#singleuser-initcontainers
# https://zero-to-jupyterhub.readthedocs.io/en/latest/jupyterhub/customizing/user-environment.html#about-user-storage-and-adding-files-to-it
extraFiles:
jb_build:
mountPath: /usr/local/share/jb_build.bash
mode: 0755
repo2data:
mountPath: /usr/local/share/repo2data.bash
mode: 0755
fill_submission_metadata:
mountPath: /usr/local/share/fill_submission_metadata.bash
mode: 0755
lifecycleHooks:
postStart:
exec:
command: ["/bin/sh", "-c", "/usr/local/share/repo2data.bash; /usr/local/share/jb_build.bash"]

# BinderHub config
config:
Expand All @@ -93,7 +72,8 @@ config:
- ^shishirchoudharygic/mltraining.*
- ^hmharshit/mltraining.*
BinderHub:
hub_url: https://binder.conp.cloud/jupyter
cors_allow_origin: '*'
hub_url: https://test.conp.cloud/jupyter
use_registry: true
image_prefix: binder-registry.conp.cloud/binder-registry.conp.cloud/binder-

Expand All @@ -103,7 +83,7 @@ service:
ingress:
enabled: true
hosts:
- binder.conp.cloud
- test.conp.cloud
annotations:
kubernetes.io/ingress.class: nginx
kubernetes.io/tls-acme: "true"
Expand All @@ -114,4 +94,4 @@ ingress:
tls:
- secretName: binder-conp-cloud-tls
hosts:
- binder.conp.cloud
- test.conp.cloud
54 changes: 35 additions & 19 deletions terraform-modules/binderhub/assets/install-binderhub.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
#!/bin/bash
#waiting for k8s initialization
while [ ! -f /var/lib/cloud/instance/boot-finished ]; do sleep 10; done
while [ ! -f /shared/k8s-initialized ]; do sleep 1; done

echo "[Binderhub pre-install] BOOT?"
while [ ! -f /var/lib/cloud/instance/boot-finished ]; do sleep 30; echo "Waiting for cloud-init on master to finalize (could take ~10min)"; done
echo "[Binderhub pre-install] K8S READY?"
while [ ! -f /shared/k8s-initialized ]; do sleep 5; echo "Waiting for K8S on master to be ready"; done

echo "[Binderhub install] Started"

cd /home/${admin_user}

# node helath monitoring
sudo helm repo add deliveryhero https://charts.deliveryhero.io/
sudo helm install deliveryhero/node-problem-detector --generate-name
sudo helm install deliveryhero/node-problem-detector --generate-name --kubeconfig ~/.kube/config

#Persistent volume
kubectl create -f pv.yaml
Expand All @@ -18,23 +22,25 @@ kubectl create namespace cert-manager
sudo helm repo add jetstack https://charts.jetstack.io
sudo helm repo update
# running on master node to avoid issues with webhook not in the k8s network
sudo helm install cert-manager --namespace cert-manager --version v1.0.3 jetstack/cert-manager --set installCRDs=true \
--set nodeSelector."node-role\.kubernetes\.io/master=" \
--set cainjector.nodeSelector."node-role\.kubernetes\.io/master=" \
--set webhook.nodeSelector."node-role\.kubernetes\.io/master="
sudo helm install cert-manager --namespace cert-manager --version v1.12.0 jetstack/cert-manager --set installCRDs=true \
--set nodeSelector."node-role\.kubernetes\.io/control-plane=" \
--set cainjector.nodeSelector."node-role\.kubernetes\.io/control-plane=" \
--set webhook.nodeSelector."node-role\.kubernetes\.io/control-plane=" \
--kubeconfig ~/.kube/config
#wait until cert-manager is ready
kubectl wait --namespace cert-manager \
--for=condition=ready pod \
--selector=app.kubernetes.io/instance=cert-manager \
--timeout=300s
# apply the issuer(s)
# kubectl create namespace binderhub
kubectl create namespace binderhub
# kubectl apply -f staging-binderhub-issuer.yaml
kubectl apply -f cloudflare-secret.yaml -n binderhub
kubectl apply -f production-binderhub-issuer.yaml

# Binderhub proxy
sudo helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx/
sudo helm install binderhub-proxy ingress-nginx/ingress-nginx --namespace=binderhub -f nginx-ingress.yaml
sudo helm install binderhub-proxy ingress-nginx/ingress-nginx --namespace=binderhub -f nginx-ingress.yaml --kubeconfig ~/.kube/config --version 4.1.4
# wait until nginx is ready (https://kubernetes.github.io/ingress-nginx/deploy/)
kubectl wait --namespace binderhub \
--for=condition=ready pod \
Expand All @@ -50,19 +56,29 @@ sudo helm repo add jupyterhub https://jupyterhub.github.io/helm-chart
sudo helm repo update
sudo helm install binderhub jupyterhub/binderhub --version=${binder_version} \
--namespace=binderhub -f config.yaml -f secrets.yaml \
--set-file jupyterhub.singleuser.extraFiles.repo2data.stringData=repo2data.bash \
--set-file jupyterhub.singleuser.extraFiles.fill_submission_metadata.stringData=fill_submission_metadata.bash \
--set-file jupyterhub.singleuser.extraFiles.jb_build.stringData=jb_build.bash
--kubeconfig ~/.kube/config

# DROPPING JB BUILD INSIDE POD SUPPORT
# --set-file jupyterhub.singleuser.extraFiles.repo2data.stringData=./repo2data.bash \
# --set-file jupyterhub.singleuser.extraFiles.fill_submission_metadata.stringData=./fill_submission_metadata.bash \
# --set-file jupyterhub.singleuser.extraFiles.jb_build.stringData=./jb_build.bash \

# sudo helm upgrade binderhub jupyterhub/binderhub -n binderhub --version=${binder_version} \
# -f confgi.yaml -f secrets.yaml \
# --set-file jupyterhub.singleuser.extraFiles.repo2data.stringData=./repo2data.bash \
# --set-file jupyterhub.singleuser.extraFiles.fill_submission_metadata.stringData=./fill_submission_metadata.bash \
# --set-file jupyterhub.singleuser.extraFiles.jb_build.stringData=./jb_build.bash \
# --kubeconfig ~/.kube/config
kubectl wait --namespace binderhub \
--for=condition=ready pod \
--selector=release=binderhub \
--timeout=120s

# Grafana and prometheus
# https://github.com/pangeo-data/pangeo-binder#binder-monitoring
sudo helm repo add grafana https://grafana.github.io/helm-charts
sudo helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
sudo helm repo add kube-state-metrics https://kubernetes.github.io/kube-state-metrics
sudo helm repo update
sudo helm install grafana-prod grafana/grafana
sudo helm install prometheus-prod prometheus-community/prometheus
# sudo helm repo add grafana https://grafana.github.io/helm-charts
# sudo helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
# sudo helm repo add kube-state-metrics https://kubernetes.github.io/kube-state-metrics
# sudo helm repo update
# sudo helm install grafana-prod grafana/grafana --kubeconfig ~/.kube/config
# sudo helm install prometheus-prod prometheus-community/prometheus --kubeconfig ~/.kube/config
61 changes: 52 additions & 9 deletions terraform-modules/binderhub/assets/jb_build.bash
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
#!/bin/bash


# ---------------------------------------------------------------
# DEPRECATED
# ---------------------------------------------------------------

# repo parameters
IFS='/'; BINDER_PARAMS=(${BINDER_REF_URL}); unset IFS;
PROVIDER_NAME=${BINDER_PARAMS[-5]}
Expand All @@ -8,14 +13,28 @@ REPO_NAME=${BINDER_PARAMS[-3]}
COMMIT_REF=${BINDER_PARAMS[-1]}
# paths
CONFIG_FILE="content/_config.yml"
NEUROLIBRE_CUSTOM="content/_neurolibre.yml"
BOOK_DST_PATH="/mnt/books/${USER_NAME}/${PROVIDER_NAME}/${REPO_NAME}/${COMMIT_REF}"
BOOK_BUILT_FLAG="${BOOK_DST_PATH}/successfully_built"
BOOK_BUILD_LOG="${BOOK_DST_PATH}/book-build.log"
BINDERHUB_URL="https://binder.conp.cloud"
BINDERHUB_URL="https://test.conp.cloud"
BOOK_CACHE_PATH=${BOOK_DST_PATH}"/_build/.jupyter_cache"

extract_yaml_field() {
local yaml_file="$1"
local field_name="$2"
if [ -f "$yaml_file" ]; then
local field_value=$(sed -n "s/^[[:blank:]]*${field_name}:[[:blank:]]*\(.*\)/\1/p" "$yaml_file")
field_value=$(sed 's/^"\(.*\)"$/\1/' <<< "$field_value")
field_value=$(sed 's/[[:space:]]//g' <<< "$field_value")
echo "$field_value"
else
echo "YAML file not found: $yaml_file"
fi
}

# checking if book build is necessary
echo "Checking if jupyter book build will be done..." 2>&1 | tee ${BOOK_BUILD_LOG}
echo "Checking if the book will be built..." 2>&1 | tee ${BOOK_BUILD_LOG}
if [ -f "${CONFIG_FILE}" ]; then
echo -e "\t ${CONFIG_FILE} exists." 2>&1 | tee -a ${BOOK_BUILD_LOG}
else
Expand All @@ -31,7 +50,7 @@ else
echo -e "\t ${BOOK_BUILT_FLAG} not found." 2>&1 | tee -a ${BOOK_BUILD_LOG}
fi
if git log -1 | grep "neurolibre-debug"; then
echo "Bypassing jupyter-book build from user request." 2>&1 | tee -a ${BOOK_BUILD_LOG}
echo "Bypassing jupyter-book build as requested by the user (neurolibre-debug)" 2>&1 | tee -a ${BOOK_BUILD_LOG}
exit 0
fi
# changing config if test submission
Expand Down Expand Up @@ -59,17 +78,41 @@ execute:
EOF
fi


if [ -f "$NEUROLIBRE_CUSTOM" ]; then
BOOK_LAYOUT=$(extract_yaml_field "$NEUROLIBRE_CUSTOM" "book_layout")
SINGLE_PAGE=$(extract_yaml_field "$NEUROLIBRE_CUSTOM" "single_page")
MYST=$(extract_yaml_field "$NEUROLIBRE_CUSTOM" "build_myst")
else
echo "YAML file not found: $yaml_file"
fi

if [ "$MYST" == "true" ]; then
echo "Myst build requested, skipping jupyter book build"
exit 0
fi

# building jupyter book
echo "" 2>&1 | tee -a ${BOOK_BUILD_LOG}
echo "Building jupyter-book for ${USER_NAME}/${PROVIDER_NAME}/${REPO_NAME}/${COMMIT_REF}" 2>&1 | tee -a ${BOOK_BUILD_LOG}
echo "Build source: ${USER_NAME}/${PROVIDER_NAME}/${REPO_NAME}/${COMMIT_REF}" 2>&1 | tee -a ${BOOK_BUILD_LOG}
echo "" 2>&1 | tee -a ${BOOK_BUILD_LOG}
mkdir -p ${BOOK_DST_PATH}
mkdir -p ${BOOK_CACHE_PATH}
touch ${BOOK_BUILD_LOG}
jupyter-book build --all --verbose --path-output ${BOOK_DST_PATH} content 2>&1 | tee -a ${BOOK_BUILD_LOG}
# Write the first line to the log
echo "" 2>&1 | tee -a ${BOOK_BUILD_LOG}

if [ "$BOOK_LAYOUT" = "traditional" ]; then
# SINGLE_PAGE exists when BOOK_LAYOUT is traditional (documentation)
echo -e "Customized book build: traditional paper layout based on ${SINGLE_PAGE}" 2>&1 | tee -a ${BOOK_BUILD_LOG}
jupyter-book build --all --verbose --path-output ${BOOK_DST_PATH} --builder singlehtml content/${SINGLE_PAGE} 2>&1 | tee -a ${BOOK_BUILD_LOG}
else
# Use default build otherwise
jupyter-book build --all --verbose --path-output ${BOOK_DST_PATH} content 2>&1 | tee -a ${BOOK_BUILD_LOG}
fi

# https://stackoverflow.com/a/1221870
JB_EXIT_CODE=${PIPESTATUS[0]}
echo "" 2>&1 | tee -a ${BOOK_BUILD_LOG}
# checking execution
if grep ${BOOK_BUILD_LOG} -e "Execution Failed"; then
echo -e "Jupyter-book execution failed!" 2>&1 | tee -a ${BOOK_BUILD_LOG}
Expand All @@ -80,9 +123,9 @@ if [ ${JB_EXIT_CODE} -ne 0 ] ; then
echo -e "Jupyter-book build failed!" 2>&1 | tee -a ${BOOK_BUILD_LOG}
exit 0
else
echo "Taring book build artifacts..." 2>&1 | tee -a ${BOOK_BUILD_LOG}
echo "Compressing book build artifacts..." 2>&1 | tee -a ${BOOK_BUILD_LOG}
tar -zcvf ${BOOK_DST_PATH}".tar.gz" ${BOOK_DST_PATH} 2>&1 | tee -a ${BOOK_BUILD_LOG}
touch ${BOOK_BUILT_FLAG}
echo "Filling metadata for current submission..." 2>&1 | tee -a ${BOOK_BUILD_LOG}
echo "Saving metadata for current submission..." 2>&1 | tee -a ${BOOK_BUILD_LOG}
/bin/bash /usr/local/share/fill_submission_metadata.bash
fi
fi
Loading

0 comments on commit a4744e6

Please sign in to comment.