Skip to content

Commit

Permalink
reindexing optimisations
Browse files Browse the repository at this point in the history
  • Loading branch information
georgepstaylor committed Sep 16, 2024
1 parent 8a9ea92 commit 26ed218
Show file tree
Hide file tree
Showing 20 changed files with 301 additions and 22 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ kustomize/**/charts/
kustomize/**/output.yaml
kustomize/base/resources.yaml
jobs/reindex/values-reindex-*.yaml
completed.txt
1 change: 1 addition & 0 deletions jobs/migrate-db/values_preprod.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
environment: preprod
13 changes: 12 additions & 1 deletion jobs/reindex/templates/reindexing-job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,19 @@ spec:
secretKeyRef:
name: rds-instance-output
key: RDS_JDBC_URL
- name: ACTIVEMQ_POOL_ENABLED
value: true
- name: ACTIVEMQ_POOL_SIZE
value: 100
- name: JAVA_OPTS
value: "-Dalfresco.reindex.fromId={{ .Values.fromId }} -Dalfresco.reindex.toId={{ .Values.toId }}"
value: >-
-Dalfresco.reindex.fromId={{ .Values.fromId }} -Dalfresco.reindex.toId={{ .Values.toId }}
-Dalfresco.reindex.multithreadedStepEnabled=true
-Dalfresco.reindex.concurrentProcessors=30
-Dalfresco.reindex.batchSize=1000
-Dalfresco.reindex.pageSize=10000
-Dalfresco.reindex.contentIndexingEnabled=true
-Dalfresco.reindex.metadataIndexingEnabled=true
ports:
- name: http
containerPort: 8080
Expand Down
8 changes: 4 additions & 4 deletions jobs/reindex/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ postgresql:
database:
existingSecretName: rds-instance-output
image:
tag: 4.0.1
tag: 3.2.0
repository: quay.io/alfresco/alfresco-elasticsearch-reindexing
resources:
requests:
cpu: "0.5"
memory: "256Mi"
memory: "1024Mi"
limits:
cpu: "2"
memory: "2048Mi"
cpu: "4"
memory: "4096Mi"
initcontainers:
waitForRepository:
resources:
Expand Down
4 changes: 4 additions & 0 deletions kustomize/base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,7 @@ patches:
- path: patch-ingress-repository.yaml
- path: patch-ingress-share.yaml
- path: patch-delete-reindexing-config-map.yaml
- path: patch-live-path-indexing.yaml
- path: patch-live-metadata-indexing.yaml
- path: patch-live-content-indexing.yaml
- path: patch-live-mediation-indexing.yaml
2 changes: 2 additions & 0 deletions kustomize/base/patch-ingress-repository.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@ metadata:
annotations:
external-dns.alpha.kubernetes.io/aws-weight: "100"
kubernetes.io/ingress.class: default
nginx.ingress.kubernetes.io/proxy-read-timeout: "200"
nginx.ingress.kubernetes.io/proxy-send-timeout: "200"
spec:
ingressClassName: default
4 changes: 2 additions & 2 deletions kustomize/base/patch-job-reindexing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ spec:
template:
spec:
containers:
- name: alfresco-search-enterprise-reindexing # Update this with the actual container name
- name: alfresco-search-enterprise-reindexing # Update this with the actual container name
volumeMounts:
- name: prefixes-file-volume
mountPath: /alf/reindex.prefixes-file.json
Expand All @@ -22,4 +22,4 @@ spec:
volumes:
- name: prefixes-file-volume
configMap:
name: prefixes-file
name: prefixes-file
14 changes: 14 additions & 0 deletions kustomize/base/patch-live-content-indexing.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: alfresco-content-services-alfresco-search-enterprise-content
spec:
template:
spec:
containers:
- name: alfresco-search-enterprise-content
env:
- name: ENABLE_CONTENT_INDEXING
value: "true"
- name: ACTIVEMQ_POOL_SIZE
value: "100"
14 changes: 14 additions & 0 deletions kustomize/base/patch-live-mediation-indexing.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: alfresco-content-services-alfresco-search-enterprise-mediation
spec:
template:
spec:
containers:
- name: alfresco-search-enterprise-mediation
env:
- name: ENABLE_CONTENT_INDEXING
value: "true"
- name: ACTIVEMQ_POOL_SIZE
value: "100"
14 changes: 14 additions & 0 deletions kustomize/base/patch-live-metadata-indexing.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: alfresco-content-services-alfresco-search-enterprise-metadata
spec:
template:
spec:
containers:
- name: alfresco-search-enterprise-metadata
env:
- name: ENABLE_CONTENT_INDEXING
value: "true"
- name: ACTIVEMQ_POOL_SIZE
value: "100"
14 changes: 14 additions & 0 deletions kustomize/base/patch-live-path-indexing.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: alfresco-content-services-alfresco-search-enterprise-path
spec:
template:
spec:
containers:
- name: alfresco-search-enterprise-path
env:
- name: ENABLE_CONTENT_INDEXING
value: "true"
- name: ACTIVEMQ_POOL_SIZE
value: "100"
28 changes: 14 additions & 14 deletions kustomize/base/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -806,20 +806,20 @@ alfresco-search-enterprise:
database:
existingSecretName: rds-instance-output
image:
tag: 4.0.1
liveIndexing:
mediation:
image:
tag: 4.0.1
content:
image:
tag: 4.0.1
metadata:
image:
tag: 4.0.1
path:
image:
tag: 4.0.1
tag: 3.2.0
liveIndexing:
mediation:
image:
tag: 3.2.0
content:
image:
tag: 3.2.0
metadata:
image:
tag: 3.2.0
path:
image:
tag: 3.2.0
alfresco-digital-workspace:
nodeSelector: {}
enabled: false
Expand Down
43 changes: 43 additions & 0 deletions kustomize/preprod/allowlist.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
- "3.10.104.193" # legacy delius-stage-az1-nat-gateway
- "3.11.26.150" # legacy delius-stage-az2-nat-gateway
- "18.130.189.137" # legacy delius-stage-az3-nat-gateway
- "35.178.209.113" # Cloud Platform live-1-eu-west-2a
- "3.8.51.207" # Cloud Platform live-1-eu-west-2c
- "35.177.252.54" # Cloud Platform live-1-eu-west-2b
- "35.176.93.186/32" # MoJ GlobalProtect
- "35.177.125.252/32" # MoJ VPN Gateway Proxies
- "35.177.137.160/32" # MoJ VPN Gateway Proxies
- "81.134.202.29/32" # MoJ VPN
- "51.149.250.0/24" # PTTP / MoJO Production Account BYOIP CIDR range
- "51.149.251.0/24" # PTTP / MoJO Production Account BYOIP CIDR range - PreProd
- "213.121.161.112/28" # 102 Petty France WiFi
- "217.33.148.210/32" # Digital studio
- "13.43.9.198/32" # MP non_live_data-public-eu-west-2a-nat
- "13.42.163.245/32" # MP non_live_data-public-eu-west-2b-nat
- "18.132.208.127/32" # MP non_live_data-public-eu-west-2c-nat
- "51.149.249.0/29" # ARK Corsham Internet Egress Exponential-E
- "51.149.249.32/29" # ARK Corsham Internet Egress Exponential-E
- "194.33.192.0/25" # ARK internet (DOM1)
- "194.33.193.0/25" # ARK internet (DOM1)
- "194.33.196.0/25" # ARK internet (DOM1)
- "194.33.197.0/25" # ARK internet (DOM1)
- "195.59.75.0/24" # ARK internet (DOM1)
- "194.33.248.0/29" # ARK Corsham Internet Egress Vodafone
- "194.33.249.0/29" # ARK Corsham Internet Egress Vodafone
- "62.25.106.209/32" # OMNI
- "195.92.40.49/32" # OMNI
- "62.25.109.197/32" # Quantum
- "195.92.38.16/28" # Quantum
- "212.137.36.230/32" # Quantum
- "78.33.10.50/31" # Unilink AOVPN
- "78.33.10.52/30" # Unilink AOVPN
- "78.33.10.56/30" # Unilink AOVPN
- "78.33.10.60/32" # Unilink AOVPN
- "78.33.32.99/32" # Unilink AOVPN
- "78.33.32.100/30" # Unilink AOVPN
- "78.33.32.104/30" # Unilink AOVPN
- "78.33.32.108/32" # Unilink AOVPN
- "83.98.63.176/29" # Unilink AOVPN
- "194.75.210.216/29" # Unilink AOVPN
- "217.138.45.109/32" # Unilink AOVPN
- "217.138.45.110/32" # Unilink AOVPN
10 changes: 10 additions & 0 deletions kustomize/preprod/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

resources:
- ../base

patches:
- path: patch-ingress-repository.yaml
- path: patch-ingress-share.yaml
- path: patch-filestore-pvc.yaml
8 changes: 8 additions & 0 deletions kustomize/preprod/patch-filestore-pvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: filestore-default-pvc
spec:
resources:
requests:
storage: 250Gi
29 changes: 29 additions & 0 deletions kustomize/preprod/patch-ingress-repository.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: alfresco-content-services-alfresco-cs-repository
annotations:
external-dns.alpha.kubernetes.io/set-identifier: alfresco-content-services-alfresco-cs-repository-hmpps-delius-alfresco-preprod-green
nginx.ingress.kubernetes.io/whitelist-source-range: "placeholder"
spec:
rules:
- host: hmpps-delius-alfresco-preprod.apps.live.cloud-platform.service.justice.gov.uk
http:
paths:
- backend:
service:
name: alfresco-content-services-alfresco-cs-repository
port:
number: 80
path: /
pathType: Prefix
- backend:
service:
name: alfresco-content-services-alfresco-cs-repository
port:
number: 80
path: /api-explorer
pathType: Prefix
tls:
- hosts:
- hmpps-delius-alfresco-preprod.apps.live.cloud-platform.service.justice.gov.uk
30 changes: 30 additions & 0 deletions kustomize/preprod/patch-ingress-share.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: alfresco-content-services-alfresco-cs-share
annotations:
external-dns.alpha.kubernetes.io/set-identifier: alfresco-content-services-alfresco-cs-share-hmpps-delius-alfresco-preprod-green
nginx.ingress.kubernetes.io/whitelist-source-range: "placeholder"
spec:
rules:
- host: share.hmpps-delius-alfresco-preprod.apps.live.cloud-platform.service.justice.gov.uk
http:
paths:
- backend:
service:
name: alfresco-content-services-alfresco-cs-share
port:
number: 80
path: /
pathType: Prefix
- backend:
service:
name: alfresco-content-services-alfresco-cs-share
port:
number: 80
path: /share/page/
pathType: Prefix
tls:
- hosts:
- share.hmpps-delius-alfresco-preprod.apps.live.cloud-platform.service.justice.gov.uk
secretName: share-ingress-cert
41 changes: 41 additions & 0 deletions kustomize/preprod/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# this file overrides values defined in ./values.yaml
repository:
# -- The startup probe to cover the worse case startup time for slow clusters
startupProbe:
periodSeconds: 30
failureThreshold: 40
readinessProbe:
initialDelaySeconds: 60
periodSeconds: 30
timeoutSeconds: 15
failureThreshold: 40 # Increased from 6 to 12
livenessProbe:
initialDelaySeconds: 260 # Increased from 130 to 260
periodSeconds: 20
timeoutSeconds: 15
failureThreshold: 40
replicaCount: 1
image:
tag: release_7.3.2_elasticsearch-r5.0.2-content-latest
resources: # requests and limits set closer together to ensure CP stability
requests:
cpu: 2
memory: 8Gi
limits:
cpu: 6
memory: 24Gi
persistence:
baseSize: 100Gi
share:
replicaCount: 1
image:
tag: release_7.3.2_elasticsearch-r5.0.2-share-latest
externalHost: hmpps-delius-alfresco-preprod.apps.live.cloud-platform.service.justice.gov.uk
externalProtocol: https
externalPort: 443
tika:
replicaCount: 2
resources:
limits:
cpu: 2
memory: 2Gi
16 changes: 15 additions & 1 deletion kustomize/stage/values.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# this file overrides values defined in ./values.yaml
repository:
# -- The startup probe to cover the worse case startup time for slow clusters
startupProbe:
periodSeconds: 30
failureThreshold: 40
readinessProbe:
initialDelaySeconds: 60
periodSeconds: 30
timeoutSeconds: 15
failureThreshold: 40 # Increased from 6 to 12
livenessProbe:
initialDelaySeconds: 260 # Increased from 130 to 260
periodSeconds: 20
timeoutSeconds: 15
failureThreshold: 40
replicaCount: 1
image:
tag: release_7.3.2_elasticsearch-r5.0.2-content-latest
Expand All @@ -16,7 +30,7 @@ share:
replicaCount: 1
image:
tag: release_7.3.2_elasticsearch-r5.0.2-share-latest
externalHost: hmpps-delius-alfresco-dev.apps.live.cloud-platform.service.justice.gov.uk
externalHost: hmpps-delius-alfresco-stage.apps.live.cloud-platform.service.justice.gov.uk
externalProtocol: https
externalPort: 443
tika:
Expand Down
29 changes: 29 additions & 0 deletions scripts/create-opensearch-index.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash
###########
# Create opensearch index with correct parameters
# Usage: create-opensearch-index.sh <index-name> <shards> <replicas>
# Portforward the opensearch domain with ./tools/scripts/opensearch-connect.sh <env> first
###########

# create opensearch index
curl -X PUT "localhost:8080/$1" -H 'Content-Type: application/json' -d'
{
"settings": {
"index": {
"number_of_shards": '$2',
"number_of_replicas": '$3'
}
}
}'

# increase maximum fields limit (alfresco seems to need just over 1000 which is the default)
curl -XPUT "localhost:8080/$1/_settings" -H "Content-Type: application/json" -d'
{
"index.mapping.total_fields.limit": 2000
}'

# The refresh interval is the time in which indexed data is searchable and should be disabled. This is done by setting it to -1 or by setting it to a higher value during indexing to avoid the unnecessary usage of resources.
curl -XPUT "localhost:8080/$1/_settings" -H 'Content-Type: application/json' -d '{ "index" : { "refresh_interval" : "-1" }}'

# set the translog flush threshold to 2GB
curl -XPUT "localhost:8080/$1/_settings" -H 'Content-Type: application/json' -d '{ "index" : { "translog" : { "flush_threshold_size" : "2gb" }} }'

0 comments on commit 26ed218

Please sign in to comment.