Skip to content

Commit

Permalink
modify jobs
Browse files Browse the repository at this point in the history
  • Loading branch information
georgepstaylor committed Nov 8, 2024
1 parent fc284b9 commit abf6ec6
Show file tree
Hide file tree
Showing 28 changed files with 12,237 additions and 47 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ kustomize/**/output.yaml
kustomize/base/resources.yaml
jobs/reindex/values-reindex-*.yaml
completed.txt
ids.json
90 changes: 89 additions & 1 deletion Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,96 @@ tasks:
simple_reindex:
cmds:
- |
helm install "reindex-default" ./jobs/reindex --set "global.elasticsearch.host={{.OPENSEARCH_HOST}}" --set "fromId=350000000" --set "toId=400000000" --namespace {{.NAMESPACE}}
helm install "reindex-default-$(openssl rand -hex 4)" ./jobs/reindex --set "global.elasticsearch.host={{.OPENSEARCH_HOST}}" --set "fromId=27451380" --set "toId=27908429" --namespace {{.NAMESPACE}}
reindex_list:
cmds:
- |
# Set your batch size (you can adjust this number as needed)
BATCH_SIZE=40
# Path to your JSON file containing the list of IDs
JSON_FILE="ids.json"
RANDOM_ID=$(openssl rand -hex 4)
# Function to create Helm job for a given batch of IDs
create_helm_job() {
# Concatenate the batch of IDs into a comma-separated string
# $1, $2, ... represent individual IDs
local idList=""
for id in "$@"; do
if [ -z "$idList" ]; then
idList="$id"
else
idList="$idList,$id"
fi
done
# Debugging: print the batch being passed
echo "Creating job for IDs: $idList" # This will show only the batch, not the whole list
# Run Helm command to create the job with the current batch of IDs
helm upgrade --install "reindex-list-${RANDOM_ID}" \
--set "idList={${idList}}" \
--set "global.elasticsearch.host={{.OPENSEARCH_HOST}}" \
--set "global.namespace={{.NAMESPACE}}" \
./jobs/reindex-list \
--namespace "{{.NAMESPACE}}"
echo "Waiting for the jobs to complete..."
kubectl wait --for=condition=complete job --namespace {{.NAMESPACE}} -l "reindex-type=list" --timeout=10h || echo "Jobs completed!"
echo "Jobs completed!"
}
# Parse the list of IDs from the JSON file using jq
# The IDs will be saved as a space-separated list into the 'ids' variable
ids=$(jq -r '.list[]' "$JSON_FILE")
# Initialize the index for processing
index=0
# Loop over the IDs and create jobs in batches
for id in $ids; do
# Add the current ID to the current batch
batch[$index]="$id"
index=$((index + 1))
# If the batch reaches the specified batch size, process it
if [ "$index" -ge "$BATCH_SIZE" ]; then
# Create the Helm job for the current batch
create_helm_job "${batch[@]}"
# Reset the batch for the next set of IDs
index=0
unset batch
# kubectl wait --for=condition=complete job --namespace {{.NAMESPACE}} -l "reindex-type=list" --timeout=10h || echo "Jobs completed!"
helm uninstall "reindex-list-${RANDOM_ID}" --namespace {{.NAMESPACE}}
fi
done
# If there are any remaining IDs (less than BATCH_SIZE), create the last job
if [ "$index" -gt 0 ]; then
create_helm_job "${batch[@]}"
fi
echo "All jobs have been created!"
echo "Cleaning up..."
helm uninstall "reindex-list-${RANDOM_ID}" --namespace {{.NAMESPACE}}
echo "Cleanup complete!"
simple_reindex_date:
cmds:
- |
helm install "reindex-default-$(openssl rand -hex 4)" ./jobs/reindex_date --set "global.elasticsearch.host={{.OPENSEARCH_HOST}}" --set "fromTime=201707060001" --set "toTime=201707070001" --namespace {{.NAMESPACE}}
simple_reindex_date_metadata-only:
cmds:
- |
helm install "reindex-default-date-meta" ./jobs/reindex_date --set "global.elasticsearch.host={{.OPENSEARCH_HOST}}" --set "fromTime=202402010100" --set "toTime=202402100100" --set "content=false" --namespace {{.NAMESPACE}}
batch_reindex:
vars:
START: "{{.START | default 0}}"
Expand Down
6 changes: 6 additions & 0 deletions jobs/reindex-list/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: v2
appVersion: 3.3.0
description: A Helm chart for deploying Alfresco Elasticsearch reindexing job
name: delius-alfresco-search-enterprise-reindexing
type: application
version: 1.2.0
93 changes: 93 additions & 0 deletions jobs/reindex-list/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# alfresco-search-enterprise

![Version: 1.2.0](https://img.shields.io/badge/Version-1.2.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 3.3.0](https://img.shields.io/badge/AppVersion-3.3.0-informational?style=flat-square)

A Helm chart for deploying Alfresco Elasticsearch connector

Please refer to the [documentation](https://github.com/Alfresco/acs-deployment/blob/master/docs/helm/README.md) for information on the Helm charts and deployment instructions.

## Requirements

| Repository | Name | Version |
|------------|------|---------|
| https://alfresco.github.io/alfresco-helm-charts/ | activemq | 3.1.0 |
| https://alfresco.github.io/alfresco-helm-charts/ | alfresco-common | 2.0.0 |
| https://helm.elastic.co | elasticsearch | 7.17.3 |

## Values

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| activemq.enabled | bool | `false` | Enable embedded broker - useful when testing this chart in standalone |
| affinity | object | `{}` | |
| contentMediaTypeCache.enabled | bool | `true` | |
| contentMediaTypeCache.refreshTime | string | `"0 0 * * * *"` | |
| elasticsearch.clusterHealthCheckParams | string | `"wait_for_status=yellow&timeout=1s"` | |
| elasticsearch.enabled | bool | `false` | Enable embedded elasticsearch - useful when using this chart in standalone |
| elasticsearch.replicas | int | `1` | |
| fullnameOverride | string | `""` | |
| global.alfrescoRegistryPullSecrets | string | `"quay-registry-secret"` | |
| global.elasticsearch | object | `{"existingSecretName":null,"host":null,"password":null,"port":null,"protocol":null,"user":null}` | Shared connections details for Elasticsearch/Opensearch cluster |
| global.elasticsearch.existingSecretName | string | `nil` | Alternatively, provide connection details via an existing secret that contains ELASTICSEARCH_USERNAME and ELASTICSEARCH_PASSWORD keys |
| global.elasticsearch.host | string | `nil` | The host where service is available |
| global.elasticsearch.password | string | `nil` | The password required to access the service, if any |
| global.elasticsearch.port | string | `nil` | The port where service is available |
| global.elasticsearch.protocol | string | `nil` | Valid values are http or https |
| global.elasticsearch.user | string | `nil` | The username required to access the service, if any |
| imagePullSecrets | list | `[]` | |
| indexName | string | `"alfresco"` | Name of the existing search index, usually created by repo |
| liveIndexing.content.image.pullPolicy | string | `"IfNotPresent"` | |
| liveIndexing.content.image.repository | string | `"quay.io/alfresco/alfresco-elasticsearch-live-indexing-content"` | |
| liveIndexing.content.image.tag | string | `"3.3.0"` | |
| liveIndexing.content.replicaCount | int | `1` | |
| liveIndexing.mediation.image.pullPolicy | string | `"IfNotPresent"` | |
| liveIndexing.mediation.image.repository | string | `"quay.io/alfresco/alfresco-elasticsearch-live-indexing-mediation"` | |
| liveIndexing.mediation.image.tag | string | `"3.3.0"` | |
| liveIndexing.metadata.image.pullPolicy | string | `"IfNotPresent"` | |
| liveIndexing.metadata.image.repository | string | `"quay.io/alfresco/alfresco-elasticsearch-live-indexing-metadata"` | |
| liveIndexing.metadata.image.tag | string | `"3.3.0"` | |
| liveIndexing.metadata.replicaCount | int | `1` | |
| liveIndexing.path.image.pullPolicy | string | `"IfNotPresent"` | |
| liveIndexing.path.image.repository | string | `"quay.io/alfresco/alfresco-elasticsearch-live-indexing-path"` | |
| liveIndexing.path.image.tag | string | `"3.3.0"` | |
| liveIndexing.path.replicaCount | int | `1` | |
| messageBroker.existingSecretName | string | `nil` | Provide connection details alternatively via an existing secret that contains BROKER_URL, BROKER_USERNAME and BROKER_PASSWORD keys |
| messageBroker.password | string | `nil` | Broker password |
| messageBroker.url | string | `nil` | Broker URL formatted as per: https://activemq.apache.org/failover-transport-reference |
| messageBroker.user | string | `nil` | Broker username |
| nameOverride | string | `""` | |
| nodeSelector | object | `{}` | |
| pathIndexingComponent.enabled | bool | `true` | |
| podAnnotations | object | `{}` | |
| podSecurityContext | object | `{}` | |
| reindexing.enabled | bool | `true` | Create the one-shot job to trigger the reindexing of repo contents |
| reindexing.image.pullPolicy | string | `"IfNotPresent"` | |
| reindexing.image.repository | string | `"quay.io/alfresco/alfresco-elasticsearch-reindexing"` | |
| reindexing.image.tag | string | `"3.3.0"` | |
| reindexing.initcontainers.waitForRepository.resources.limits.cpu | string | `"0.25"` | |
| reindexing.initcontainers.waitForRepository.resources.limits.memory | string | `"10Mi"` | |
| reindexing.pathIndexingEnabled | bool | `true` | |
| reindexing.postgresql.database | string | `"alfresco"` | The database name to use |
| reindexing.postgresql.existingSecretName | string | `nil` | Alternatively, provide connection details via an existing secret that contains DATABASE_USERNAME and DATABASE_PASSWORD keys |
| reindexing.postgresql.hostname | string | `"postgresql-acs"` | The host where database service is available |
| reindexing.postgresql.password | string | `nil` | The password required to access the service |
| reindexing.postgresql.port | int | `5432` | The port where service is available |
| reindexing.postgresql.url | string | `nil` | |
| reindexing.postgresql.user | string | `nil` | The username required to access the service |
| reindexing.resources.limits.cpu | string | `"2"` | |
| reindexing.resources.limits.memory | string | `"512Mi"` | |
| reindexing.resources.requests.cpu | string | `"0.5"` | |
| reindexing.resources.requests.memory | string | `"128Mi"` | |
| resources.limits.cpu | string | `"2"` | |
| resources.limits.memory | string | `"2048Mi"` | |
| resources.requests.cpu | string | `"0.5"` | |
| resources.requests.memory | string | `"256Mi"` | |
| searchIndex | object | `{"existingSecretName":null,"host":null,"password":null,"port":null,"protocol":null,"user":null}` | Overrides .Values.global.elasticsearch |
| searchIndex.existingSecretName | string | `nil` | Alternatively, provide connection details via an an existing secret that contains ELASTICSEARCH_USERNAME and ELASTICSEARCH_PASSWORD keys |
| searchIndex.host | string | `nil` | The host where service is available |
| searchIndex.password | string | `nil` | The password required to access the service, if any |
| searchIndex.port | string | `nil` | The port where service is available |
| searchIndex.protocol | string | `nil` | Valid values are http or https |
| searchIndex.user | string | `nil` | The username required to access the service, if any |
| securityContext | object | `{}` | |
| tolerations | list | `[]` | |
21 changes: 21 additions & 0 deletions jobs/reindex-list/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{- define "content-services.shortname" -}}
{{- $name := (.Values.NameOverride | default "alfresco-cs") -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}

{{- define "spring.activemq.env" -}}
- name: SPRING_ACTIVEMQ_BROKERURL
value: $(BROKER_URL)
- name: SPRING_ACTIVEMQ_USER
value: $(BROKER_USERNAME)
- name: SPRING_ACTIVEMQ_PASSWORD
value: $(BROKER_PASSWORD)
{{- end -}}

{{- define "alfresco-search-enterprise.searchIndexExistingSecretName" -}}
{{ .Values.global.elasticsearch.existingSecretName }}
{{- end -}}

{{- define "alfresco-search-enterprise.config.spring" -}}
SPRING_ELASTICSEARCH_REST_URIS: "{{ .Values.global.elasticsearch.protocol }}://{{ .Values.global.elasticsearch.host }}:{{ .Values.global.elasticsearch.port }}"
{{- end -}}
16 changes: 16 additions & 0 deletions jobs/reindex-list/templates/reindexing-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: reindexing-list-configmap
data:
ELASTICSEARCH_INDEXNAME: "{{ .Values.indexName }}"
{{ template "alfresco-search-enterprise.config.spring" . }}
ALFRESCO_SHAREDFILESTORE_BASEURL: http://alfresco-content-services-alfresco-filestore:80/alfresco/api/-default-/private/sfs/versions/1/file/
ALFRESCO_ACCEPTEDCONTENTMEDIATYPESCACHE_BASEURL: http://alfresco-content-services-alfresco-router/transform/config
ALFRESCO_REINDEX_PATHINDEXINGENABLED: {{ .Values.pathIndexingEnabled | quote }}
SPRING_DATASOURCE_URL: {{ .Values.postgresql.url }}
{{- if .Values.environment }}
{{- range $key, $val := .Values.environment }}
{{ $key }}: {{ $val | quote }}
{{- end }}
{{- end }}
95 changes: 95 additions & 0 deletions jobs/reindex-list/templates/reindexing-job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
{{- $index := 0 }}
{{- range $item := .Values.idList }}
---
apiVersion: batch/v1
kind: Job
metadata:
name: "reindexing-{{ $item | toString }}-{{ add (int $item) 1 }}-{{ $index }}"
labels:
reindex-job: "{{ $item | toString }}-{{ add (int $item) 1 }}-{{ $index }}"
reindex-type: "list"
spec:
template:
metadata:
{{- with $.Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
securityContext:
fsGroup: 1000
runAsGroup: 1000
runAsNonRoot: true
runAsUser: 33000
seccompProfile:
type: RuntimeDefault
supplementalGroups:
- 1
imagePullSecrets:
- name: {{ $.Values.imagePullSecrets }}
restartPolicy: Never
containers:
- name: "reindexing-{{ $item | toString }}-{{ add (int $item) 1 }}"
image: "{{ $.Values.image.repository }}:{{ $.Values.image.tag }}"
imagePullPolicy: {{ $.Values.image.pullPolicy }}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
runAsNonRoot: true
seccompProfile:
type: RuntimeDefault
resources: {{- toYaml $.Values.resources | nindent 12 }}
volumeMounts:
- name: reindexing-prefixes-file-volume
mountPath: /alf/reindex.prefixes-file.json
subPath: reindex.prefixes-file.json
envFrom:
- configMapRef:
name: reindexing-list-configmap
- secretRef:
name: {{ $.Values.messageBroker.existingSecretName }}
env:
- name: SPRING_DATASOURCE_PASSWORD
valueFrom:
secretKeyRef:
name: {{ $.Values.postgresql.existingSecretName }}
key: DATABASE_PASSWORD
- name: SPRING_DATASOURCE_USERNAME
valueFrom:
secretKeyRef:
name: {{ $.Values.postgresql.existingSecretName }}
key: DATABASE_USERNAME
{{- include "spring.activemq.env" $ | nindent 12 }}
- name: ALFRESCO_REINDEX_PREFIXES_FILE
value: file:///alf/reindex.prefixes-file.json
- name: SPRING_DATASOURCE_URL
valueFrom:
secretKeyRef:
name: rds-instance-output
key: RDS_JDBC_URL
- name: ACTIVEMQ_POOL_ENABLED
value: "true"
- name: ACTIVEMQ_POOL_SIZE
value: "200"
- name: JAVA_OPTS
value: >-
-Dalfresco.reindex.fromId={{ . }}
-Dalfresco.reindex.toId={{ add . 1 }}
-Dalfresco.reindex.contentIndexingEnabled=true
-Dalfresco.reindex.metadataIndexingEnabled=true
-Dalfresco.reindex.multithreadedStepEnabled=true
-Dalfresco.reindex.concurrentProcessors=30
-Dalfresco.reindex.batchSize=1000
-Dalfresco.reindex.pageSize=10000
ports:
- name: http
containerPort: 8080
protocol: TCP
volumes:
- name: reindexing-prefixes-file-volume
configMap:
name: reindexing-list-prefixes-configmap
{{- $index = add $index 1 }}
{{- end }}
Loading

0 comments on commit abf6ec6

Please sign in to comment.