Skip to content
This repository was archived by the owner on Jan 19, 2024. It is now read-only.

Commit fcd0097

Browse files
moxiousadam-cowley
moxious
andauthoredAug 6, 2020
Antora Docs (#68)
* Antora documentation: ASCIIDOC -> HTML/CSS/JS * Test system adjustments to use a custom build container, and generate antora docs on every build Co-authored-by: Adam Cowley <[email protected]>
1 parent cce71f3 commit fcd0097

29 files changed

+4225
-37
lines changed
 

‎.circleci/config.yml

+23-33
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ version: 2.1
22
jobs:
33
build:
44
docker:
5-
- image: debian:stretch
5+
# Custom image; see tools/build/Dockerfile
6+
- image: gcr.io/neo4j-helm/build:latest
67

78
environment:
89
PROJECT: neo4j-helm
@@ -21,26 +22,16 @@ jobs:
2122
- run:
2223
name: Tooling pre-requisites
2324
command: |
24-
# Secure software install; required first in order to be able to process keys, packages, etc.
25-
apt-get update && apt-get install -y apt-transport-https ca-certificates curl gnupg2 software-properties-common
26-
27-
# Google Cloud stuff
28-
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
29-
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add -
30-
31-
# Docker stuff
32-
curl -fsSL https://download.docker.com/linux/debian/gpg | apt-key add -
33-
add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/debian $(lsb_release -cs) stable"
34-
35-
apt-get update
36-
EXTRA_NEEDED_TOOLS="wget make gettext-base jq"
37-
apt-get install -y google-cloud-sdk $EXTRA_NEEDED_TOOLS
38-
3925
# We will install local tools so add those to path.
4026
echo "export PATH=./tools:.:$PATH" >> $BASH_ENV
4127
mkdir -p $BUILD_ARTIFACTS
4228
mkdir -p tools
4329
30+
- restore_cache:
31+
name: Restore NPM Package Cache
32+
keys:
33+
- npm-packages-{{ checksum "doc/package.json" }}
34+
4435
- run:
4536
name: Setup GCP Tooling
4637
command: |
@@ -49,30 +40,26 @@ jobs:
4940
$GCLOUD_SERVICE_ACCOUNT \
5041
--key-file=$SERVICE_KEY_FILE
5142
gcloud auth configure-docker
52-
43+
5344
- run:
54-
name: Kubectl Setup
45+
name: Generate Docs
5546
command: |
56-
cd tools
57-
curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl
58-
chmod +x kubectl
59-
kubectl --help
60-
47+
cd doc
48+
npm install
49+
./node_modules/.bin/antora --stacktrace docs.yml
50+
51+
- save_cache:
52+
name: Save Yarn Package Cache
53+
key: npm-packages-{{ checksum "doc/package.json" }}
54+
paths:
55+
- ~/.cache/npm
56+
6157
- run:
6258
name: GKE Setup / Auth
6359
command: |
6460
echo "GKE SETUP"
6561
export CLUSTER_NAME=$CLUSTER-$CIRCLE_BUILD_NUM
6662
./tools/test/provision-k8s.sh $CLUSTER_NAME
67-
68-
- run:
69-
name: Install Helm Binaries
70-
command: |
71-
cd tools
72-
curl -LO https://get.helm.sh/helm-v3.2.1-linux-amd64.tar.gz
73-
tar zxvf helm-v3.2.1-linux-amd64.tar.gz
74-
mv linux-amd64/helm .
75-
helm version
7663
7764
- run:
7865
name: Lint
@@ -215,4 +202,7 @@ jobs:
215202
--quiet
216203
217204
- store_artifacts:
218-
path: build
205+
path: build
206+
207+
- store_artifacts:
208+
path: doc/build/site

‎.gitignore

+2-2
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,6 @@ k8s-poc
4242
*.log
4343
expanded.yaml
4444

45-
package.json
46-
package-lock.json
4745
node_modules
46+
doc/node
47+
doc/build

‎.helmignore

+1
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ k8s-poc
44
*.log
55
test
66
node_modules
7+
doc

‎doc/docs.yml

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
site:
2+
title: Neo4j-Helm User Guide
3+
url: /neo4j-helm-docs
4+
content:
5+
sources:
6+
- url: ../
7+
start_path: doc/docs
8+
branches: HEAD
9+
ui:
10+
bundle:
11+
url: https://github.com/neo4j-documentation/docs-refresh/raw/master/ui/build/ui-bundle.zip
12+
snapshot: true
13+
asciidoc:
14+
attributes:
15+
page-theme: docs
16+
page-cdn: /_/

‎doc/docs/antora.yml

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
name: neo4j-helm
2+
version: 1.0.0
3+
title: Neo4j-Helm User Guide
4+
start_page: ROOT:index.adoc
5+
nav:
6+
- modules/ROOT/nav.adoc
7+
8+
asciidoc:
9+
attributes:
10+
docs-version: 1.0.0
11+
copyright: Neo4j Inc.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
2+
Apache License
3+
Version 2.0, January 2004
4+
http://www.apache.org/licenses/
5+
6+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7+
8+
1. Definitions.
9+
10+
"License" shall mean the terms and conditions for use, reproduction,
11+
and distribution as defined by Sections 1 through 9 of this document.
12+
13+
"Licensor" shall mean the copyright owner or entity authorized by
14+
the copyright owner that is granting the License.
15+
16+
"Legal Entity" shall mean the union of the acting entity and all
17+
other entities that control, are controlled by, or are under common
18+
control with that entity. For the purposes of this definition,
19+
"control" means (i) the power, direct or indirect, to cause the
20+
direction or management of such entity, whether by contract or
21+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
22+
outstanding shares, or (iii) beneficial ownership of such entity.
23+
24+
"You" (or "Your") shall mean an individual or Legal Entity
25+
exercising permissions granted by this License.
26+
27+
"Source" form shall mean the preferred form for making modifications,
28+
including but not limited to software source code, documentation
29+
source, and configuration files.
30+
31+
"Object" form shall mean any form resulting from mechanical
32+
transformation or translation of a Source form, including but
33+
not limited to compiled object code, generated documentation,
34+
and conversions to other media types.
35+
36+
"Work" shall mean the work of authorship, whether in Source or
37+
Object form, made available under the License, as indicated by a
38+
copyright notice that is included in or attached to the work
39+
(an example is provided in the Appendix below).
40+
41+
"Derivative Works" shall mean any work, whether in Source or Object
42+
form, that is based on (or derived from) the Work and for which the
43+
editorial revisions, annotations, elaborations, or other modifications
44+
represent, as a whole, an original work of authorship. For the purposes
45+
of this License, Derivative Works shall not include works that remain
46+
separable from, or merely link (or bind by name) to the interfaces of,
47+
the Work and Derivative Works thereof.
48+
49+
"Contribution" shall mean any work of authorship, including
50+
the original version of the Work and any modifications or additions
51+
to that Work or Derivative Works thereof, that is intentionally
52+
submitted to Licensor for inclusion in the Work by the copyright owner
53+
or by an individual or Legal Entity authorized to submit on behalf of
54+
the copyright owner. For the purposes of this definition, "submitted"
55+
means any form of electronic, verbal, or written communication sent
56+
to the Licensor or its representatives, including but not limited to
57+
communication on electronic mailing lists, source code control systems,
58+
and issue tracking systems that are managed by, or on behalf of, the
59+
Licensor for the purpose of discussing and improving the Work, but
60+
excluding communication that is conspicuously marked or otherwise
61+
designated in writing by the copyright owner as "Not a Contribution."
62+
63+
"Contributor" shall mean Licensor and any individual or Legal Entity
64+
on behalf of whom a Contribution has been received by Licensor and
65+
subsequently incorporated within the Work.
66+
67+
2. Grant of Copyright License. Subject to the terms and conditions of
68+
this License, each Contributor hereby grants to You a perpetual,
69+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70+
copyright license to reproduce, prepare Derivative Works of,
71+
publicly display, publicly perform, sublicense, and distribute the
72+
Work and such Derivative Works in Source or Object form.
73+
74+
3. Grant of Patent License. Subject to the terms and conditions of
75+
this License, each Contributor hereby grants to You a perpetual,
76+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77+
(except as stated in this section) patent license to make, have made,
78+
use, offer to sell, sell, import, and otherwise transfer the Work,
79+
where such license applies only to those patent claims licensable
80+
by such Contributor that are necessarily infringed by their
81+
Contribution(s) alone or by combination of their Contribution(s)
82+
with the Work to which such Contribution(s) was submitted. If You
83+
institute patent litigation against any entity (including a
84+
cross-claim or counterclaim in a lawsuit) alleging that the Work
85+
or a Contribution incorporated within the Work constitutes direct
86+
or contributory patent infringement, then any patent licenses
87+
granted to You under this License for that Work shall terminate
88+
as of the date such litigation is filed.
89+
90+
4. Redistribution. You may reproduce and distribute copies of the
91+
Work or Derivative Works thereof in any medium, with or without
92+
modifications, and in Source or Object form, provided that You
93+
meet the following conditions:
94+
95+
(a) You must give any other recipients of the Work or
96+
Derivative Works a copy of this License; and
97+
98+
(b) You must cause any modified files to carry prominent notices
99+
stating that You changed the files; and
100+
101+
(c) You must retain, in the Source form of any Derivative Works
102+
that You distribute, all copyright, patent, trademark, and
103+
attribution notices from the Source form of the Work,
104+
excluding those notices that do not pertain to any part of
105+
the Derivative Works; and
106+
107+
(d) If the Work includes a "NOTICE" text file as part of its
108+
distribution, then any Derivative Works that You distribute must
109+
include a readable copy of the attribution notices contained
110+
within such NOTICE file, excluding those notices that do not
111+
pertain to any part of the Derivative Works, in at least one
112+
of the following places: within a NOTICE text file distributed
113+
as part of the Derivative Works; within the Source form or
114+
documentation, if provided along with the Derivative Works; or,
115+
within a display generated by the Derivative Works, if and
116+
wherever such third-party notices normally appear. The contents
117+
of the NOTICE file are for informational purposes only and
118+
do not modify the License. You may add Your own attribution
119+
notices within Derivative Works that You distribute, alongside
120+
or as an addendum to the NOTICE text from the Work, provided
121+
that such additional attribution notices cannot be construed
122+
as modifying the License.
123+
124+
You may add Your own copyright statement to Your modifications and
125+
may provide additional or different license terms and conditions
126+
for use, reproduction, or distribution of Your modifications, or
127+
for any such Derivative Works as a whole, provided Your use,
128+
reproduction, and distribution of the Work otherwise complies with
129+
the conditions stated in this License.
130+
131+
5. Submission of Contributions. Unless You explicitly state otherwise,
132+
any Contribution intentionally submitted for inclusion in the Work
133+
by You to the Licensor shall be under the terms and conditions of
134+
this License, without any additional terms or conditions.
135+
Notwithstanding the above, nothing herein shall supersede or modify
136+
the terms of any separate license agreement you may have executed
137+
with Licensor regarding such Contributions.
138+
139+
6. Trademarks. This License does not grant permission to use the trade
140+
names, trademarks, service marks, or product names of the Licensor,
141+
except as required for reasonable and customary use in describing the
142+
origin of the Work and reproducing the content of the NOTICE file.
143+
144+
7. Disclaimer of Warranty. Unless required by applicable law or
145+
agreed to in writing, Licensor provides the Work (and each
146+
Contributor provides its Contributions) on an "AS IS" BASIS,
147+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148+
implied, including, without limitation, any warranties or conditions
149+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150+
PARTICULAR PURPOSE. You are solely responsible for determining the
151+
appropriateness of using or redistributing the Work and assume any
152+
risks associated with Your exercise of permissions under this License.
153+
154+
8. Limitation of Liability. In no event and under no legal theory,
155+
whether in tort (including negligence), contract, or otherwise,
156+
unless required by applicable law (such as deliberate and grossly
157+
negligent acts) or agreed to in writing, shall any Contributor be
158+
liable to You for damages, including any direct, indirect, special,
159+
incidental, or consequential damages of any character arising as a
160+
result of this License or out of the use or inability to use the
161+
Work (including but not limited to damages for loss of goodwill,
162+
work stoppage, computer failure or malfunction, or any and all
163+
other commercial damages or losses), even if such Contributor
164+
has been advised of the possibility of such damages.
165+
166+
9. Accepting Warranty or Additional Liability. While redistributing
167+
the Work or Derivative Works thereof, You may choose to offer,
168+
and charge a fee for, acceptance of support, warranty, indemnity,
169+
or other liability obligations and/or rights consistent with this
170+
License. However, in accepting such obligations, You may act only
171+
on Your own behalf and on Your sole responsibility, not on behalf
172+
of any other Contributor, and only if You agree to indemnify,
173+
defend, and hold each Contributor harmless for any liability
174+
incurred by, or claims asserted against, such Contributor by reason
175+
of your accepting any such warranty or additional liability.
176+
177+
END OF TERMS AND CONDITIONS
178+
179+
APPENDIX: How to apply the Apache License to your work.
180+
181+
To apply the Apache License to your work, attach the following
182+
boilerplate notice, with the fields enclosed by brackets "[]"
183+
replaced with your own identifying information. (Don't include
184+
the brackets!) The text should be enclosed in the appropriate
185+
comment syntax for the file format. We also recommend that a
186+
file or class name and description of purpose be included on the
187+
same "printed page" as the copyright notice for easier
188+
identification within third-party archives.
189+
190+
Copyright [yyyy] [name of copyright owner]
191+
192+
Licensed under the Apache License, Version 2.0 (the "License");
193+
you may not use this file except in compliance with the License.
194+
You may obtain a copy of the License at
195+
196+
http://www.apache.org/licenses/LICENSE-2.0
197+
198+
Unless required by applicable law or agreed to in writing, software
199+
distributed under the License is distributed on an "AS IS" BASIS,
200+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201+
See the License for the specific language governing permissions and
202+
limitations under the License.

‎doc/docs/modules/ROOT/nav.adoc

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
* xref::index.adoc[Neo4j Helm User Guide]
2+
* xref::prerequisites.adoc[Prerequisites]
3+
* xref::quickstart.adoc[Quick Start]
4+
* xref::installation.adoc[Installation]
5+
* xref::configreference.adoc[Configuration Reference]
6+
* xref::tooling.adoc[Tooling]
7+
* xref::operations.adoc[Operations]
8+
* xref::backup.adoc[Backup]
9+
* xref::restore.adoc[Restore]
10+
* xref::rolling-upgrades.adoc[Rolling Upgrades]
11+
* xref::externalexposure.adoc[External Exposure]
12+
* xref::hardware.adoc[Hardware & Machine Shape]
13+
* xref::networking.adoc[Networking & Security]
14+
* xref::troubleshooting.adoc[Troubleshooting]
15+
* xref::development.adoc[Local Development of the Chart]
+117
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
[#backup]
2+
# Backing up Neo4j Containers
3+
4+
[NOTE]
5+
**This approach assumes you have Google Cloud credentials and wish to store your backups
6+
on Google Cloud Storage**. If this is not the case, you will need to adjust the backup
7+
script for your desired cloud storage method, but the approach will work for any backup location.
8+
9+
[NOTE]
10+
**This approach works only for Neo4j 4.0+**. The backup tool and the
11+
DBMS itself changed quite a lot between 3.5 and 4.0, and the approach
12+
here will likely not work for older databases without substantial
13+
modification.
14+
15+
## Background & Important Information
16+
17+
### Required Neo4j Config
18+
19+
This is provided for you out of the box by the helm chart, but if you
20+
customize you should bear these requirements in mind:
21+
22+
* `dbms.backup.enabled=true`
23+
* `dbms.backup.listen_address=0.0.0.0:6362`
24+
25+
The default for Neo4j is to listen only on 127.0.0.1, which will not
26+
work as other containers would not be able to access the backup port.
27+
28+
### Backup Pointers
29+
30+
All backups will turn into .tar.gz files with date strings when they were taken, such as: `neo4j-2020-06-16-12:32:57.tar.gz`. They are named after the database
31+
they are a backup of.
32+
33+
When you take a backup, you will get both the dated version, and a "latest" copy,
34+
e.g. the above file will also be copied to neo4j-latest.tar.gz in the same bucket.
35+
36+
[NOTE]
37+
**Reminder: Each time you take a backup, the latest file will be overwritten**.
38+
39+
The purpose of doing this is to have a stable name in storage where the latest
40+
backup can always be found, without losing any of the previous backups.
41+
42+
### Neo4j Backs Up Databases, Not the DBMS
43+
44+
In Neo4j 4.0, the system can be multidatabase; most systems have at least 2 DBs,
45+
"system" and "neo4j". *These need to be backed up and restored individually*.
46+
47+
## Steps to Take a Backup
48+
49+
### Create a service key secret to access cloud storage
50+
51+
First you want to create a kubernetes secret that contains the content of your account service key. This key must have permissions to access the bucket and backup set that you're trying to restore.
52+
53+
```shell
54+
MY_SERVICE_ACCOUNT_KEY=$HOME/.google/my-service-key.json
55+
kubectl create secret generic neo4j-service-key \
56+
--from-file=credentials.json=$MY_SERVICE_ACCOUNT_KEY
57+
```
58+
59+
The backup container is going to take this kubernetes secret
60+
(named `neo4j-service-key`) and is going to mount it as a file
61+
inside of the backup container (`/auth/credentials.json`). That
62+
file will then be used to authenticate the storage client that we
63+
need to upload the backupset to cloud storage when it's complete.
64+
65+
### Running a Backup
66+
67+
The backup method is itself a mini-helm chart, and so to run a backup, you just
68+
do this as a minimal required example:
69+
70+
```shell
71+
helm install my-backup-deployment . \
72+
--set neo4jaddr=my-neo4j.default.svc.cluster.local:6362 \
73+
--set bucket=gs://my-bucket/ \
74+
--set database="neo4j\,system" \
75+
--set secretName=neo4j-service-key
76+
```
77+
78+
from within the tools/backup directory
79+
where the chart resides. You must have first created a `neo4j-service-key`
80+
secret in the same namespace as your Neo4j is running.
81+
82+
If all goes well, after a period of time when the Kubernetes Job is complete, you
83+
will simply see the backup files appear in the designated bucket.
84+
85+
[NOTE]
86+
**If your backup does not appear, consult the job container logs to find out
87+
why**
88+
89+
**Required parameters**
90+
91+
* `neo4jaddr` pointing to an address where your cluster is running, ideally the
92+
discovery address.
93+
* `bucket` where you want the backup copied to. It should be `gs://bucketname`. This parameter may include a relative path (`gs://bucketname/mycluster`)
94+
* `databases` a comma separated list of databases to back up. The default is
95+
`neo4j,system`. If your DBMS has many individual databases, you should change this.
96+
97+
**Optional environment variables**
98+
99+
All of the following variables mimic the command line options
100+
for https://neo4j.com/docs/operations-manual/current/backup/performing/#backup-performing-command[neo4j-admin backup documented here]
101+
102+
* `pageCache`
103+
* `heapSize`
104+
* `fallbackToFull` (true/false), default=true
105+
* `checkConsistency` (true/false), default=true
106+
* `checkIndexes` (true/false) default=true
107+
* `checkGraph` (true/false), default=true
108+
* `checkLabelScanStore` (true/false), default=true
109+
* `checkPropertyOwners` (true/false), default=false
110+
111+
### Exit Conditions
112+
113+
If the backup of any of the individual databases mentioned in the database parameters
114+
fails, the entire container will exit with a non-zero exit code and fail.
115+
116+
**Note**: it is possible for Neo4j backups to succeed, but with failed consistency checks.
117+
This will be noted in the logs, but will operationally behave as a successful backup.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
= Configuration Reference
2+
3+
[abstract]
4+
Reference tables with a list of all configurable parameters and their defaults.
5+
6+
## General Configuration
7+
8+
.General Configuration Reference
9+
|===
10+
|Parameter |Description| Default
11+
12+
| `image`
13+
| Neo4j image
14+
| `neo4j`
15+
16+
| `imageTag`
17+
| Neo4j version
18+
| (The default matches the release version of the helm chart itself)
19+
20+
| `imagePullPolicy`
21+
| Image pull policy
22+
| `IfNotPresent`
23+
24+
| `podDisruptionBudget`
25+
| Pod disruption budget
26+
| `{}`
27+
28+
| `authEnabled`
29+
| Is login/password required?
30+
| `true`
31+
32+
| `plugins`
33+
| Plugins to automatically install. (syntax must be valid JSON array string) https://github.com/neo4j/docker-neo4j/blob/master/neo4jlabs-plugins.json[Valid plugins listed here]
34+
| `"[\"apoc\"]"`
35+
36+
| `defaultDatabase`
37+
| The name of the default database to configure in Neo4j (dbms.default_database)
38+
| `neo4j`
39+
40+
| `neo4jPassword`
41+
| Password to log in the Neo4J database if password is required
42+
| (random string of 10 characters)
43+
44+
| `resources`
45+
| Resources required (e.g. CPU, memory)
46+
| `{}` (no specific requests or limits)
47+
48+
| `restoreSecret`
49+
| The name of the kubernetes secret to mount to `/creds` in the container. Please see the [restore documentation](../tools/restore/README-RESTORE.md) for how to use this.
50+
| (none)
51+
52+
| `existingPasswordSecret`
53+
| The name of the kubernetes secret which contains the `neo4j-password`
54+
| (none)
55+
56+
| `podLabels`
57+
| Extra / custom labels to apply to core & replica statefulset pods
58+
| `{}`
59+
60+
| `podAnnotations`
61+
| Extra / custom annotations to apply to core & replica statefulset pods.
62+
| `{}`
63+
64+
|===
65+
66+
## Neo4j Core Members
67+
68+
.Core Member Configuration Reference
69+
|===
70+
|Parameter |Description| Default
71+
| `core.configMap`
72+
| Configmap providing configuration for core cluster members. If not specified, defaults that come with the chart will be used.
73+
| `$NAME-neo4j-core-config`
74+
75+
| `core.standalone`
76+
| Whether to run in single-server STANDALONE mode. When using standalone mode, core.numberOfServers is *ignored* and you will only get 1 Neo4j Pod. The remainder of core configuration applies.
77+
| false
78+
79+
| `core.numberOfServers`
80+
| Number of machines in CORE mode
81+
| `3`
82+
83+
| `core.sideCarContainers`
84+
| Sidecar containers to add to the core pod. Example use case is a sidecar which identifies and labels the leader when using the http API
85+
| `{}`
86+
87+
| `core.initContainers`
88+
| Init containers to add to the core pod. Example use case is a script that installs custom plugins/extensions
89+
| `{}`
90+
91+
| `core.persistentVolume.enabled`
92+
| Whether or not persistence is enabled
93+
| `true`
94+
95+
| `core.persistentVolume.storageClass`
96+
| Storage class of backing PVC
97+
| `standard` (uses beta storage class annotation)
98+
99+
| `core.persistentVolume.size`
100+
| Size of data volume
101+
| `10Gi`
102+
103+
| `core.persistentVolume.mountPath`
104+
| Persistent Volume mount root path
105+
| `/data`
106+
107+
| `core.persistentVolume.subPath`
108+
| Subdirectory of the volume to mount
109+
| `nil`
110+
111+
| `core.persistentVolume.annotations`
112+
| Persistent Volume Claim annotations
113+
| `{}`
114+
115+
| `core.additionalVolumes`
116+
| See the "Other Storage" section in the user guide for more information on this option.
117+
| `{}`
118+
119+
| `core.additionalVolumeMounts`
120+
| See the "Other Storage" section in the user guide for more information on this option.
121+
| `{}`
122+
123+
| `core.service.type`
124+
| Service type
125+
| `ClusterIP`
126+
127+
| `core.service.annotations`
128+
| Service annotations
129+
| `{}`
130+
131+
| `core.service.labels`
132+
| Custom Service labels
133+
| `{}`
134+
135+
| `core.service.loadBalancerSourceRanges`
136+
| List of IP CIDRs allowed access to LB (if `core.service.type: LoadBalancer`)
137+
| `[]`
138+
139+
| `core.discoveryService.type`
140+
| Service type
141+
| `ClusterIP`
142+
143+
| `core.discoveryService.annotations`
144+
| Service annotations
145+
| `{}`
146+
147+
| `core.discoveryService.labels`
148+
| Custom Service labels
149+
| `{}`
150+
151+
| `core.discoveryService.loadBalancerSourceRanges`
152+
| List of IP CIDRs allowed access to LB (if `core.discoveryService.type: LoadBalancer`)
153+
| `[]`
154+
|===
155+
156+
## Neo4j Read Replicas
157+
158+
.Read Replica Configuration Reference
159+
|===
160+
|Parameter |Description| Default
161+
| `readReplica.configMap`
162+
| Configmap providing configuration for RR cluster members. If not specified, defaults that come with the chart will be used.
163+
| `$NAME-neo4j-replica-config`
164+
165+
| `readReplica.numberOfServers`
166+
| Number of machines in READ_REPLICA. May not be used with core.standalone=true mode
167+
| `0`
168+
169+
| `readReplica.autoscaling.enabled`
170+
| Enable horizontal pod autoscaler
171+
| `false`
172+
173+
| `readReplica.autoscaling.targetAverageUtilization`
174+
| Target CPU utilization
175+
| `70`
176+
177+
| `readReplica.autoscaling.minReplicas`
178+
| Min replicas for autoscaling
179+
| `1`
180+
181+
| `readReplica.autoscaling.maxReplicas`
182+
| Max replicas for autoscaling
183+
| `3`
184+
185+
| `readReplica.initContainers`
186+
| Init containers to add to the replica pods. Example use case is a script that installs custom plugins/extensions
187+
| `{}`
188+
189+
| `readReplica.persistentVolume.*`
190+
| See `core.persistentVolume.*` settings; they behave identically for read replicas
191+
| `N/A`
192+
193+
| `readReplica.additionalVolumes`
194+
| See the "Other Storage" section in the user guide for more information on this option.
195+
| `{}`
196+
197+
| `readReplica.additionalVolumeMounts`
198+
| See the "Other Storage" section in the user guide for more information on this option.
199+
| `{}`
200+
201+
| `readReplica.service.type`
202+
| Service type
203+
| `ClusterIP`
204+
205+
| `readReplica.service.annotations`
206+
| Service annotations
207+
| `{}`
208+
209+
| `readReplica.service.labels`
210+
| Custom Service labels
211+
| `{}`
212+
213+
| `readReplica.service.loadBalancerSourceRanges`
214+
| List of IP CIDRs allowed accessto LB (if `readReplica.service.type: LoadBalancer`)
215+
| `[]`
216+
217+
|===
218+
219+
== Naming your Deploy
220+
221+
This chart uses the `fullnameOverride` convention, to allow you to control the name of resources that get applied to the cluster. By default, when you install a release called mygraph you'll end up with resources named things like `mygraph-neo4j-core` and `mygraph-neo4j-replica` which is the release name, app name, and component name.
222+
223+
If you would like to override this, you may specify any of these values:
224+
225+
* fullnameOverride
226+
* fullnamePrefix
227+
* fullnameSuffix
228+
229+
So for example if you set `fullnameOverride=graph` and `fullnamePrefix=marketing` then you will see the resources deployed named like:
230+
231+
* `marketing-graph-core`
232+
* `marketing-graph-replica`
233+
234+
(And so on) which would omit both the helm release name, and the app name (neo4j).
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
= Local Development
2+
3+
[abstract]
4+
This chapter provides instructions for local development of the helm chart and how to make changes to it.
5+
6+
7+
== Template Expansion
8+
9+
```shell
10+
helm template --name tester --set acceptLicenseAgreement=yes --set neo4jPassword=mySecretPassword . > expanded.yaml
11+
```
12+
13+
== Full-Cycle Test
14+
15+
The following mini-script will provision a test cluster, monitor it for rollout, test it, report results, and teardown/destroy PVCs.
16+
17+
=== Provision K8S Cluster
18+
19+
Please use `tools/test/provision-k8s.sh` and customize to your Google Cloud project ID
20+
21+
=== Standalone
22+
23+
Standalone forms faster so we can manually lower the liveness/readiness timeouts.
24+
25+
```shell
26+
export NAME=a
27+
export NAMESPACE=default
28+
helm install $NAME . --set acceptLicenseAgreement=yes --set neo4jPassword=mySecretPassword --set core.standalone=true --set readinessProbe.initialDelaySeconds=20 --set livenessProbe.initialDelaySeconds=20 && \
29+
kubectl rollout status --namespace $NAMESPACE StatefulSet/$NAME-neo4j-core --watch && \
30+
helm test $NAME --logs | tee testlog.txt
31+
helm uninstall $NAME
32+
sleep 20
33+
for idx in 0 1 2 ; do
34+
kubectl delete pvc datadir-$NAME-neo4j-core-$idx ;
35+
done
36+
```
37+
38+
=== Causal Cluster
39+
40+
```shell
41+
export NAME=a
42+
export NAMESPACE=default
43+
helm install $NAME . --set acceptLicenseAgreement=yes --set neo4jPassword=mySecretPassword --set readReplica.numberOfServers=1 && \
44+
kubectl rollout status --namespace $NAMESPACE StatefulSet/$NAME-neo4j-core --watch && \
45+
helm test $NAME --logs | tee testlog.txt
46+
helm uninstall $NAME
47+
sleep 20
48+
for idx in 0 1 2 ; do
49+
kubectl delete pvc datadir-$NAME-neo4j-core-$idx ;
50+
done
51+
```
52+
53+
== Internal Tooling
54+
55+
This repo contains internal tooling containers for backup, restore, and test of the helm chart, found
56+
under the `tools` directory.
57+
58+
== Building the Containers
59+
60+
If you want to push your own docker containers, make sure the registry in the Makefile is set to some
61+
where you have permissions on.
62+
63+
```shell
64+
cd tools
65+
make docker_build
66+
make docker_push
67+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
[#externalexposure]
2+
# External Exposure of Neo4j Clusters
3+
4+
[abstract]
5+
This chapter describes how to route traffic from the outside world or Internet to a Neo4j cluster running in Kubernetes.
6+
7+
## Overview / Problem
8+
9+
As described in the user guide, by default when you install Neo4j, each
10+
node in your cluster gets a private internal DNS address, which it advertises to its clients.
11+
12+
This works "out of the box" without any knowledge of your local addressing or DNS situation. The
13+
downside is that external clients cannot use the bolt+routing or neo4j protocols to connect to the cluster,
14+
because they cannot route traffic to strictly cluster internal DNS names. With the default helm install,
15+
connections from the outside fail even with proper exposure of the pods, because:
16+
17+
1. The client connects to Neo4j
18+
2. Fetches a routing table, which contains entries like `graph-neo4j-core-0.graph-neo4j.default.svc.cluster.local`
19+
3. External clients attempt and fail to connect to routing table entries
20+
4. Overall connection fails or times out.
21+
22+
https://medium.com/neo4j/neo4j-considerations-in-orchestration-environments-584db747dca5[This article discusses these background issues] in depth. These instructions are
23+
intended as a quick method of exposing Neo4j Clusters, but you may have to do additional work
24+
depending on your configuration.
25+
26+
## Solution Approach
27+
28+
To fix external clients, we need two things:
29+
30+
1. The `dbms.connector.*_address` settings inside of each Neo4j node set to the externally routable address
31+
2. An externally valid DNS name or IP address that clients can connect to, that routes traffic to the kubernetes pod
32+
33+
Some visual diagrams about what's going on https://docs.google.com/presentation/d/14ziuwTzB6O7cp7fq0mA1lxWwZpwnJ9G4pZiwuLxBK70/edit?usp=sharing[can be found in the architectural documentation here].
34+
35+
We're going to address point 1 with some special configuration of the Neo4j pods themselves. I'll explain
36+
the Neo4j config bits first, and then we'll tie it together with the external. The most complex bit of this
37+
is ensuring each pod has the right config.
38+
39+
We're going to address point 2 with Kubernetes Load Balancers. We will create one per pod in our Neo4j
40+
stateful set. We will associate static IP addresses to those load balancers. This enables packets to flow from
41+
outside of Kubernetes to the right pod / Neo4j cluster member.
42+
43+
## Proper Neo4j Pod Config
44+
45+
In the helm chart within this repo, Neo4j core members are part of a stateful set, and get indexes.
46+
Given a deployment in a particular namespace, you end up with the following hostnames:
47+
48+
* `<deployment>-neo4j-core-0.<deployment>-neo4j.<namespace>.svc.cluster.local`
49+
* `<deployment>-neo4j-core-1.<deployment>-neo4j.<namespace>.svc.cluster.local`
50+
* `<deployment>-neo4j-core-2.<deployment>-neo4j.<namespace>.svc.cluster.local`
51+
52+
The helm chart in this repo can take a configurable ConfigMap for setting env vars on these pods. So
53+
we can define our own configuration and pass it to the StatefulSet on startup. The `custom-core-configmap.yml`
54+
file in this directory is an example of that.
55+
56+
### Create Static IP addresses for inbound cluster traffic
57+
58+
I'm using GCP, so it is done like this. Important notes here, on GCP the region must match your GKE
59+
region, and the network tier must be premium. On other clouds, the conceptual step here is the same,
60+
but the details will differ: you need to allocate 3 static IP addresses, which we'll use in a later
61+
step.
62+
63+
```shell
64+
# Customize these next 2 for the region of your GKE cluster,
65+
# and your GCP project ID
66+
REGION=us-central1
67+
PROJECT=my-gcp-project-id
68+
69+
for idx in 0 1 2 ; do
70+
gcloud compute addresses create \
71+
neo4j-static-ip-$idx --project=$PROJECT \
72+
--network-tier=PREMIUM --region=$REGION
73+
74+
echo "IP$idx:"
75+
gcloud compute addresses describe neo4j-static-ip-$idx \
76+
--region=$REGION --project=$PROJECT --format=json | jq -r '.address'
77+
done
78+
```
79+
80+
**If you are doing this with Azure** please note that the static IP addresses must be in the same
81+
resource group as your kubernetes cluster, and can be created with
82+
[az network public-ip create](https://docs.microsoft.com/en-us/cli/azure/network/public-ip?view=azure-cli-latest#az-network-public-ip-create) like this (just one single sample):
83+
`az network public-ip create -g resource_group_name -n core01 --sku standard --dns-name neo4jcore01 --allocation-method Static`. The Azure SKU used must be standard, and the resource group you need can be found in the kubernetes Load Balancer that [following the Azure Tutorial](https://docs.microsoft.com/en-us/azure/aks/kubernetes-walkthrough) sets up for you.
84+
85+
For the remainder of this tutorial, let's assume that the core IP addresses I've allocated here are
86+
as follows; I'll refer to them as these environment variables:
87+
88+
```shell
89+
export IP0=35.202.123.82
90+
export IP1=34.71.151.230
91+
export IP2=35.232.116.39
92+
```
93+
94+
We will also need 3 exposure addresses that we want to advertise to the clients. I'm going to set these
95+
to be the same as the IP addresses, but if you have mapped DNS, you could use DNS names instead here.
96+
97+
It's important for later steps that we have *both* IPs *and* addresses, because they're used differently.
98+
99+
```shell
100+
export ADDR0=$IP0
101+
export ADDR1=$IP1
102+
export ADDR2=$IP2
103+
```
104+
105+
### Per-Host Configuration
106+
107+
Recall that the Helm chart will let us configure core nodes with a custom config map. That's good.
108+
But the problem with 1 configmap for all 3 cores is that each host needs *different config* for proper exposure.
109+
So in the helm chart, we've divided the neo4j settings into basic settings, and over-rideable settings. In
110+
the custom configmap example, you'll see lines like this:
111+
112+
```yaml
113+
$DEPLOYMENT_neo4j_core_0_NEO4J_dbms_default__advertised__address: $ADDR0
114+
$DEPLOYMENT_neo4j_core_1_NEO4J_dbms_default__advertised__address: $ADDR0
115+
```
116+
117+
In a minute, after expanding $DEPLOYMENT to be "graph",
118+
these variables have "host prefixes" - `graph_neo4j_core_0_*` settings will only apply to the host
119+
`graph-neo4j-core-0`. (The dashes are changed to _ because dashes aren't supported in env var naming).
120+
Very important to notice that these override settings have the pod name/hostname already "baked into them",
121+
so it's important to know how you're planning to deploy Neo4j prior to setting this up.
122+
123+
These "address settings" need to be changed to match the 3 static IPs that we allocated in the previous
124+
step. There are four critical env vars, all of which need to be configured, for each host:
125+
* `NEO4J_dbms_default__advertised__address`
126+
* `NEO4J_dbms_connector_bolt_advertised__address`
127+
* `NEO4J_dbms_connector_http_advertised__address`
128+
* `NEO4J_dbms_connector_https_advertised__address`
129+
130+
With overrides, that's 12 special overrides (4 vars each for 3 containers)
131+
132+
So using this "override approach" we can have *1 ConfigMap* that specifies all the config for 3 members
133+
of a cluster, while still allowing per-host configuration settings to differ. The override approach in
134+
question is implemented in a small amount of bash that is in the `core-statefulset.yaml` file. It simply
135+
reads the environment and applies default values, permitting overrides if the override matches the host
136+
where the changes are being applied.
137+
138+
In the next command, we'll apply the custom configmap. Here you use the IP addresses from the previous
139+
step as ADDR0, ADDR1, and ADDR2. Alternatively, if those IP addresses are associated with DNS entries,
140+
you can use those DNS names instead. We're calling them addresses because they can be any address you
141+
want to advertise, and don't have to be an IP. But these addresses must resolve to the static IPs we
142+
created in the earlier step.
143+
144+
```shell
145+
export DEPLOYMENT=graph
146+
export NAMESPACE=default
147+
export ADDR0=35.202.123.82
148+
export ADDR1=34.71.151.230
149+
export ADDR2=35.232.116.39
150+
151+
cat tools/external-exposure/custom-core-configmap.yaml | envsubst | kubectl apply -f -
152+
```
153+
154+
Once customized, we now have a ConfigMap we can point our Neo4j deployment at, to advertise properly.
155+
156+
### Installing the Helm Chart
157+
158+
From the root of this repo, navigate to stable/neo4j and issue this command to install the helm chart
159+
with a deployment name of "graph". The deployment name *must match what you did in previous steps*,
160+
because remember we gave pod-specific overrides in the previous step.
161+
162+
```shell
163+
export DEPLOYMENT=graph
164+
helm install $DEPLOYMENT . \
165+
--set core.numberOfServers=3 \
166+
--set readReplica.numberOfServers=0 \
167+
--set core.configMap=$DEPLOYMENT-neo4j-externally-addressable-config \
168+
--set acceptLicenseAgreement=yes \
169+
--set neo4jPassword=mySecretPassword
170+
```
171+
172+
Note the custom configmap that is passed.
173+
174+
## External Exposure
175+
176+
After a few minutes you'll have a fully-formed cluster whose pods show ready, and which you can connect
177+
to, *but* it will be advertising values that Kubernetes isn't routing yet. So what we need to do next is to
178+
create a load balancer *per Neo4j core pod*, and set the `loadBalancerIP` to be the static IP address we
179+
reserved in the earlier step, and advertised with the custom ConfigMap.
180+
181+
A `load-balancer.yaml` file has been provided as a template, here's how to make 3 of them for given static
182+
IP addresses:
183+
184+
```shell
185+
export DEPLOYMENT=graph
186+
187+
# Reuse IP0, etc. from the earlier step here.
188+
# These *must be IP addresses* and not hostnames, because we're
189+
# assigning load balancer IP addresses to bind to.
190+
export CORE_ADDRESSES=($IP0 $IP1 $IP2)
191+
192+
for x in 0 1 2 ; do
193+
export IDX=$x
194+
export IP=${CORE_ADDRESSES[$x]}
195+
echo $DEPLOYMENT with IDX $IDX and IP $IP ;
196+
197+
cat tools/external-exposure/load-balancer.yaml | envsubst | kubectl apply -f -
198+
done
199+
```
200+
201+
You'll notice we're using 3 load balancers for 3 pods. In a sense it's silly to "load balance" a single
202+
pod. But without a lot of extra software and configuration, this is the best option, because LBs will
203+
support TCP connections (ingresses won't), and LBs can get their own independent IP addresses which can be
204+
associated with DNS later on. Had we used NodePorts, we'd be at the mercy of more dynamic IP assignment,
205+
and also have to worry about a Kubernetes cluster member itself falling over. ClusterIPs aren't suitable
206+
at all, as they don't give you external addresses.
207+
208+
There are other fancier options, such as the [nginx-ingress controller](https://kubernetes.github.io/ingress-nginx/)
209+
but in this config we're shooting for something as simple as possible that you can do with existing
210+
kubernetes primities without installing new packages you might not already have.
211+
212+
[NOTE]
213+
**Potential Trip-up point**: On GKE, the only thing needed to associate the static IP to the
214+
load balancer is this `loadBalancerIP` field in the YAML. On other clouds, there may be additional steps
215+
to allocate the static IP to the Kubernetes cluster. Consult your local cloud documentation.
216+
217+
## Putting it All Together
218+
219+
We can verify our services are running nicely like this:
220+
221+
```
222+
$ kubectl get service | grep neo4j-external
223+
zeke-neo4j-external-0 LoadBalancer 10.0.5.183 35.202.123.82 7687:30529/TCP,7474:30843/TCP,7473:30325/TCP 115s
224+
zeke-neo4j-external-1 LoadBalancer 10.0.9.182 34.71.151.230 7687:31059/TCP,7474:31288/TCP,7473:31009/TCP 115s
225+
zeke-neo4j-external-2 LoadBalancer 10.0.12.38 35.232.116.39 7687:30523/TCP,7474:30844/TCP,7473:31732/TCP 114s
226+
```
227+
228+
After all of these steps, you should end up with a cluster properly exposed. We can recover our password
229+
like so, and connect to any of the 3 static IPs.
230+
231+
```shell
232+
export NEO4J_PASSWORD=$(kubectl get secrets graph-neo4j-secrets -o yaml | grep password | sed 's/.*: //' | base64 -d)
233+
cypher-shell -a neo4j://34.66.183.174:7687 -u neo4j -p "$NEO4J_PASSWORD"
234+
```
235+
236+
Additionally, since we exposed port 7474, you can go to any of the static IPs on port 7474 and end up with
237+
Neo4j browser and be able to connect.
238+
239+
## Where to Go Next
240+
241+
* If you have static IPs, you can of course associate DNS with them, and obtain signed
242+
certificates.
243+
* This in turn will let you expose signed cert HTTPS using standard Neo4j techniques, and
244+
will also permit advertising DNS instead of a bare IP if you wish.
245+
246+
## References
247+
248+
* For background on general Kubernetes network exposure issues, I'd recommend this article:
249+
https://medium.com/google-cloud/kubernetes-$TYPE-vs-loadbalancer-vs-ingress-when-should-i-use-what-922f010849e0[Kubernetes $TYPE vs. LoadBalancer vs. Ingress? When should I use what?]
+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
= Hardware and Machine Shape
2+
3+
[abstract]
4+
How to size memory, CPU, and disk for Neo4j in Kubernetes
5+
6+
7+
In order to ensure that Neo4j is deployable on basic/default K8S clusters, the default values for hardware requests have been made fairly low, and can be found in https://github.com/neo4j-contrib/neo4j-helm/blob/master/values.yaml[values.yaml].
8+
9+
Sizing databases is ultimately something that should be done with the workload in mind. Consult Neo4j's https://neo4j.com/developer/guide-performance-tuning/?ref=googlemarketplace[Performance Tuning Documentation] for more information. In general, heap size and page cache sizing are the most important places to start when tuning performance.
10+
11+
It is strongly recommended that you choose request and limit values for CPU and memory prior to deploying in important environments.
12+
+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
2+
= Neo4j Helm Chart Overview
3+
4+
== Project Overview
5+
6+
Neo4j-helm allows users to deploy multi-node Neo4j Enterprise Causal Clusters to Kubernetes instances, with configuration options for the most common scenarios. It represents a very rapid way to get started running the world leading native graph database on top of Kubernetes.
7+
8+
This guide is intended only as a supplement to the Neo4j Operations Manual. Neo4j-helm is essentially a docker container based deploy of Neo4j Causal Cluster. As such, all of the information in the Operations Manual applies to its operation, and this guide will focus only on kubernetes-specific concerns.
9+
10+
[NOTE]
11+
**In addition to the information in this user guide, a set of slides is available on the deployment architecture and chart structure of this repository. https://docs.google.com/presentation/d/14ziuwTzB6O7cp7fq0mA1lxWwZpwnJ9G4pZiwuLxBK70/edit?usp=sharing:[Neo4j Helm Chart Structure]**
12+
13+
== Versioning
14+
15+
The versioning of this helm chart will follow Neo4j versions for simplicity.
16+
17+
* Version 4.0.X-Y of the helm chart is compatible with Neo4j EE v4.0.*
18+
* Version 4.1.X-Y of the helm chart is compatible with Neo4j EE v4.1.*
19+
* (and so on)
20+
21+
The charts in this repository are for Neo4j 4.0 going forward. There are previous available charts for the Neo4j 3.5 series, but there are substantial differences between the two versions. Careful upgrade planning is advised before attempting to upgrade an existing chart.
22+
23+
Consult the https://neo4j.com/docs/operations-manual/current/upgrade/[upgrade guide] and expect that additional configuration of this chart will be necessary.
24+
25+
== Licensing
26+
27+
The source code to neo4j-helm is available under the terms of the Apache License, version 2.0. See the LICENSE.txt file in the source code repository for full terms and conditions.
28+
29+
This documentation is licensed under link:{attachmentsdir}/LICENSE.txt[Creative Commons 4.0]
30+
31+
[NOTE]
32+
The documentation version is v{docs-version}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
= Installation
2+
3+
[abstract]
4+
Explore the options for installing neo4j-helm, and how to configure advanced scenarios
5+
6+
7+
This is a helm chart, and it is installed by running https://helm.sh/docs/helm/helm_install/[helm install] with various parameters used to customize the deploy.
8+
9+
The default for this chart is to install https://neo4j.com/docs/operations-manual/current/clustering/[Neo4j Causal Cluster]*, with 3 core members and zero replicas, but standalone is also supported.
10+
11+
== Causal Cluster Example
12+
13+
```shell
14+
helm install my-neo4j \
15+
--set core.numberOfServers=3,readReplica.numberOfServers=3,acceptLicenseAgreement=yes,neo4jPassword=mySecretPassword .
16+
```
17+
18+
The above command creates a cluster containing 3 core servers and 3 read replicas.
19+
20+
Alternatively, a YAML file that specifies the values for the parameters can be provided while installing the chart. For example,
21+
22+
```shell
23+
helm install neo4j-helm -f values.yaml .
24+
```
25+
26+
[NOTE]
27+
*Tip*: You can copy the default https://github.com/neo4j-contrib/neo4j-helm/blob/master/values.yaml[values.yaml] file as an example to edit, with full access to all options.
28+
29+
== Standalone (Single Machine) Command Line Example
30+
31+
```shell
32+
helm install my-neo4j --set core.standalone=true,acceptLicenseAgreement=yes,neo4jPassword=mySecretPassword .
33+
```
34+
35+
Important notes about standalone mode:
36+
37+
* When running in standalone mode, core.numberOfServers is ignored and you will get 1 server.
38+
* Read replicas may only be used with causal cluster. When running standalone, all read replica arguments are ignored.
39+
* All other core settings (persistent volume size, annotations, etc) will still apply to your single instance.
40+
* Standalone instances installed in this way cannot be scaled into clusters.
41+
If you attempt to scale a standalone system, you will get multiple independent DBMSes, you will not get 1 causal cluster.
42+
43+
== Deployment Scenarios
44+
45+
See the https://github.com/neo4j-contrib/neo4j-helm/blob/master/deployment-scenarios[deployment-scenarios folder] in the repo for example YAML values files. These are example configurations that show settings necessary to launch the helm chart in different configurations.
46+
47+
Each of these scenario files is launched the same way:
48+
49+
```shell
50+
helm install mygraph -f deployment-scenarios/my-scenario.yaml .
51+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
= Networking & Security
2+
3+
[abstract]
4+
How to work with Neo4j networking & security concepts
5+
6+
7+
== Exposed Services
8+
9+
For security reasons, we have not enabled access to the database cluster from outside of Kubernetes by default, instead choosing to leave this to users to configure appropriate network access policies for their usage. If this is desired, please look at the external exposure instructions found in this repository.
10+
11+
By default, each node will expose:
12+
13+
* HTTP on port 7474
14+
* HTTPS on port 7473
15+
* Bolt on port 7687
16+
17+
Exposed services and port mappings can be configured by referencing neo4j’s docker documentation. See the advanced configuration section in this document for how to change the way the docker containers in each pod are configured.
18+
19+
Refer to the Neo4j operations manual for information on the ports that Neo4j needs to function. Default port numbers in the helm chart exactly follow default ports in other installations.
20+
21+
== Service Address
22+
23+
Additionally, a service address inside of the cluster will be available as follows - to determine your service address, simply substitute `$APP_INSTANCE_NAME` with the name you deployed neo4j under, and `$NAMESPACE` with the kubernetes namespace where neo4j resides.
24+
25+
`$NAME-neo4j.$NAMESPACE.svc.cluster.local`
26+
27+
Any client may connect to this address, as it is a DNS record with multiple entries pointing to the nodes which back the cluster. For example, bolt+routing clients can use this address to bootstrap their connection into the cluster, subject to the items in the limitations section.
28+
29+
== Cluster Formation
30+
31+
Immediately after deploying Neo4j, as the pods are created the cluster begins to form. This may take up to 5 minutes, depending on a number of factors including how long it takes pods to get scheduled, and how many resources are associated with the pods. While the cluster is forming, the Neo4j REST API and Bolt endpoints may not be available. After a few minutes, bolt endpoints become available inside of the kubernetes cluster.
32+
33+
== Password
34+
35+
After installing, your cluster will start with the password you supplied as the neo4jPassword setting. This is stored in a kubernetes secret that is attached to your deployment. Given a deployment named “my-graph”, you can find the password as the “neo4j-password” key under the mygraph-neo4j-secrets configuration item in Kubernetes. The password is base64 encoded, and can be recovered as plaintext by authorized users with this command:
36+
37+
```shell
38+
export NEO4J_PASSWORD=$(kubectl get secrets {{ template "neo4j.secrets.fullname" . }} -o yaml | grep password | sed 's/.*: //' | base64 -d)
39+
```
40+
41+
Alternatively: if you set `existingPasswordSecret` that secret name should be used instead.
42+
43+
This password applies for the base administrative user named “neo4j”.
44+
+152
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
= Operations
2+
3+
[abstract]
4+
How to perform a variety of Neo4j system operations in Kubernetes
5+
6+
7+
## Logging
8+
9+
Neo4j logs that would normally be stored as the `neo4j.log` file are pod logs in Kubernetes. As a result, they are not written directly as a file to disk, but are accessible by just issuing `kubectl logs podname`.
10+
11+
The debug.log file is a regular file written to disk inside of the pod. As of the 4.1.0-4 release and later, the default log path has been changed to place logs on persistent storage. They will typically be found at `/data/logs` inside the container, and are set to the path specified by the `dbms.directories.logs` configuration parameter in Neo4j.
12+
13+
The same locations apply for other Neo4j log files such as `security.log` and `query.log`.
14+
15+
## Memory Management
16+
17+
The chart follows the same memory configuration settings as described in the https://neo4j.com/docs/operations-manual/current/performance/memory-configuration/[Memory Configuration] section of the Operations manual.
18+
19+
### Default Approach
20+
Neo4j-helm behaves just like the regular Neo4j product. No explicit heap or page cache is set. Memory grows dynamically according to what the JVM can allocate.
21+
22+
### Recommended Approach
23+
24+
You may use the setting `dbms.memory.use_memrec=true` and this will run https://neo4j.com/docs/operations-manual/current/tools/neo4j-admin-memrec/[neo4j-admin memrec] and use its recommendations. This use_memrec setting is an option for the helm chart, it is not a Neo4j configuration option.
25+
26+
It's very important that you also specify CPU and memory resources on launch that are adequate to support the recommendations. Crashing pods, "unscheduleable" errors, and other problems will result if the recommended amounts of memory are higher than the Kubernetes requests/limits.
27+
28+
### Custom Explicit Settings
29+
30+
You may set any of the following settings. The helm chart accepts these settings, mirroring the names used in the neo4j.conf file.
31+
32+
* `dbms.memory.heap.initial_size`
33+
* `dbms.memory.heap.max_size`
34+
* `dbms.memory.pagecache.size`
35+
36+
Their meanings, formats, and defaults are the same as found in the operations manual. See the section "Passing Custom Configuration as a ConfigMap" for how to set these settings for your database.
37+
38+
To see an example of this custom configuration in use with a single instance, please see the https://github.com/neo4j-contrib/neo4j-helm/blob/master/deployment-scenarios/standalone-custom-memory-config.yaml[standalone custom memory config deployment scenario].
39+
40+
## Monitoring
41+
42+
This chart supports the same monitoring configuration settings as described in the Neo4j Operations Manual. These have been ommitted from the table above because they are documented in the operational manual, but here are three quick examples:
43+
44+
* To publish prometheus metrics, `--set metrics.prometheus.enabled=true,metrics.prometheus.endpoint=localhost:2004`
45+
* To publish graphite metrics, `--set metrics.graphite.enabled=true,metrics.graphite.server=localhost:2003,metrics.graphite.interval=3s`
46+
* To adjust CSV metrics (enabled by default) use `metrics.csv.enabled` and `metrics.csv.interval`.
47+
* To disable JMX metrics (enabled by default) use `metrics.jmx.enabled=false`.
48+
49+
## Data Persistence
50+
51+
The most important data is kept in the `/data` volume attached to each of the core cluster members. These in turn are mapped to `Per`sistentVolumeClaims` (PVCs) in Kubernetes, and they are not deleted when you run `helm uninstall mygraph`.
52+
53+
It is recommended that you investigate the `storageClass` option for persistent volume claims, and to choose low-latency SSD disks for best performance with Neo4j. The `storageClass` name you need to choose will vary with different distributions of Kubernetes.
54+
55+
[NOTE]
56+
**Important**: PVCs retain data between Neo4j installs. If you deploy a cluster under the name 'mygraph', later uninstall it and then re-install it a second time under the same name, the new instance will inherit all of the old data from the pre-existing PVCs. This would include things like usernames, passwords, roles, and so forth.
57+
58+
For further durability of data, xref::backup.adoc[regularly scheduled backups] are recommended.
59+
60+
## Other Storage
61+
62+
The helm chart supports values for `additionalVolumes` and `additionalVolumeMounts` for both core and read replica sets. These can be used to set up arbitrary extra mounted drives inside of the containers. Exactly how to specify this is left to the user, because it depends on whether you want to use an existing PVC, mount a configmap, or other setup. This feature is intended to provide storage flexibility to inject files such as `apoc.conf`, initialization scripts, or imports directories.
63+
64+
Use of additional volumes and mounts is not supported though, and in order to use this feature you must be very comfortable with filesystem basics in Kubernetes and Neo4j directory configuration.
65+
66+
## Fabric
67+
68+
In Neo4j 4.0+, https://neo4j.com/docs/operations-manual/current/fabric/introduction/[fabric] is a feature that can be enabled with regular configuration in `neo4j.conf`. All of the fabric configuration that is referenced in the manual can be done via custom ConfigMaps described in this documentation.
69+
70+
### Simple Usage
71+
72+
https://github.com/neo4j-contrib/neo4j-helm/blob/master/deployment-scenarios/fabric[A simple worked example of this approach can be found here.]
73+
74+
Using Neo4j Fabric in kubernetes boils down to configuring the product as normal, but with the “docker style".
75+
In the neo4j operations manual, it might tell you to set `fabric.database.name=myfabric` and in kubernetes that would be `NEO4J_fabric_database_name: myfabric` and so forth.
76+
77+
So that is fairly straightforward. But this is only one half of the story. The other half is, what is the fabric deployment topology?
78+
79+
### Fabric Topology
80+
81+
https://neo4j.com/docs/operations-manual/current/fabric/introduction/#_multi_cluster_deployment[Fabric enables some very complex setups]. If you have a *single DBMS* you can do it with pure configuration and it will work. If you have multiple DBMSs, then the way this works behind the scenes is via account/role coordination, and bolt connections between clusters.
82+
83+
That in turn means that you would need to have network routing bits set up so that cluster A could talk to cluster B (referring to the diagram linked above). This would mostly be kubernetes networking stuff, nothing too exotic, but this would need to be carefully planned for.
84+
85+
Where this gets complicated is when the architecture gets big/complex. Suppose you’re using fabric to store shards of a huge “customer graph”. The shard of US customers exists in one geo region, and the shard of EU customers in another geo region. You can use fabric to query both shards and have a logical view of the “customer graph” over all geos. To do this in kubernetes though would imply kubernetes node pools in two different geos, and almost certainly 2 different neo4j clusters. To enable bolt between them (permitting fabric to work) would get into a more advanced networking setup for kubernetes specifically. But to neo4j as a product, it’s all the same. Can I make a neo4j/bolt connection to the remote source? Yes? Then it should be fine.
86+
87+
How Fabric Works
88+
What fabric needs to work are 3 things:
89+
90+
1. A user/role (neo4j/admin for example) that is the same on all databases subject to the fabric query
91+
2. The ability to make a bolt connection to all cluster members participating in the fabric query
92+
3. Some configuration.
93+
94+
Custom configmaps (which are discussed in this section) cover #3. Your security configuration (whatever you choose) would cover #1 and isn’t kubernetes specific. And #2 is where kubernetes networking may or may not come in, depending on your deployment topology. In the simplest single DBMS configurations, I think it will work out of the box.
95+
96+
## Custom Neo4j Configuration with ConfigMaps
97+
98+
Neo4j cluster pods are divided into two groups: cores and replicas. Those pods can be configured with ConfigMaps, which contain environment variables. Those environment variables, in turn, are used as configuration settings to the underlying Neo4j Docker Container, according to the Neo4j environment variable configuration.
99+
100+
As a result, you can set any custom Neo4j configuration by creating your own Kubernetes configmap, and using it like this:
101+
102+
```
103+
--set core.configMap=myConfigMapName --set readReplica.configMap=myReplicaConfigMap
104+
```
105+
106+
[NOTE]
107+
Configuration of some networking specific settings is still done at container start time, and this very small set of variables may still be overridden by the helm chart, in particular advertised addresses & hostnames for the containers.
108+
109+
## Scaling
110+
111+
The following section describes considerations about changing the size of a cluster at runtime to handle more requests. Scaling only applies to causal cluster, and standalone instances cannot be scaled in this way.
112+
113+
### Planning
114+
115+
Before scaling a database running on kubernetes, make sure to consult in depth the Neo4j documentation on clustering architecture, and in particular take care to choose carefully between whether you want to add core nodes or read replicas. Additionally, this planning process should take care to include details of the kubernetes layer, and where the node pools reside. Adding extra core nodes to protect data with additional redundancy may not provide extra guarantees if all kubernetes nodes are in the same zone, for example.
116+
117+
For many users and use cases, careful planning on initial database sizing is preferable to later attempts to rapidly scale the cluster.
118+
119+
When adding new nodes to a neo4j cluster, upon the node joining the cluster, it will need to replicate the existing data from the other nodes in the cluster. As a result, this can create a temporary higher load on the remaining nodes as they replicate data to the new member. In the case of very large databases, this can cause temporary unavailability under heavy loads. We recommend that when setting up a scalable instance of Neo4j, you configure pods to restore from a recent backup set before starting. Instructions on how to restore are provided in this repo. In this way, new pods are mostly caught up before entering the cluster, and the "catch-up" process is minimal both in terms of time spent and load placed on the rest of the cluster.
120+
121+
Because of the data intensive nature of any database, careful planning before scaling is highly recommended. Storage allocation for each new node is also needed; as a result, when scaling the database, the kubernetes cluster will create new persistent volume claims and GCE volumes.
122+
123+
Because Neo4j's configuration is different in single-node mode (dbms.mode=SINGLE) you should not scale a deployment if it was initially set to 1 coreServer. This will result in multiple independent databases, not one cluster.
124+
125+
### Execution (Manual Scaling)
126+
127+
Neo4j-Helm consists of a StatefulSet for core nodes, and a Deployment for replicas. In configuration, even if you chose zero replicas, you will see a Deployment with zero members.
128+
129+
Scaling the database is a matter of scaling one of these elements.
130+
131+
Depending on the size of your database and how busy the other members are, it may take considerable time for the cluster topology to show the presence of the new member, as it connects to the cluster and performs catch-up. Once the new node is caught up, you can execute the cypher query CALL dbms.cluster.overview(); to verify that the new node is operational.
132+
133+
### Execution (Automated Scaling)
134+
135+
The helm chart provides settings which provide for a https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/[HorizontalPodAutoscaler] for read replicas, which can automatically scale according to the CPU utilization of the underlying pods. For usage of this feature, please see the `readReplica.autoscaling.*` settings documented in the supported settings above.
136+
137+
For further details about how this works and what it entails, please consult the https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/[kubernetes documentation on horizontal pod autoscalers].
138+
139+
[NOTE]
140+
*Automated scaling applies only to read replicas*. At this time we do not recommend automatic scaling of core members of the cluster at all, and core member scaling should be limited to special operations such as rolling upgrades, documented separately.
141+
142+
### Warnings and Indications
143+
144+
Scaled pods inherit their configuration from their statefulset. For neo4j, this means that items like configured storage size, hardware limits, and passwords apply to scale up members.
145+
146+
If scaling down, do not scale below three core nodes; this is the minimum necessary to guarantee a properly functioning cluster with data redundancy. Consult the neo4j clustering documentation for more information. Neo4j-Helm uses PVCs, and so if you scale up and then later scale down, this may orphan an underlying PVC, which you may want to manually delete at a later date.
147+
148+
## Anti-Affinity Rules
149+
150+
For productionized installs, anti-affinity rules are recommended, where pod deployment is intentionally spread out among Kubernetes worker nodes. This improves Neo4j's failure characteristics. If Kubernetes inadvertently deploys all 3 core Neo4j pods to a single worker node, and the underlying worker node VM fails -- then the entire cluster will go down. For this reason, anti-affinity rules are recommended to "spread the deployment out".
151+
152+
An example of how to configure this with references to documentation is provided in the deployment scenarios directory.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
= Prerequisites
2+
3+
[abstract]
4+
This chapter provides the tools you need to have installed before using neo4j-helm.
5+
6+
7+
== Required Software
8+
9+
* Kubernetes 1.6+ with Beta APIs enabled
10+
* Docker and kubectl installed locally
11+
* Helm >= 3.1 installed
12+
* PV provisioner support in the underlying infrastructure
13+
* Requires the following variables You must add acceptLicenseAgreement in the values.yaml file and set it to yes or include `--set acceptLicenseAgreement=yes` in the command line of helm install to accept the license.
14+
* This chart requires that you have a license for Neo4j Enterprise Edition. Trial licenses https://neo4j.com/lp/enterprise-cloud/?utm_content=kubernetes[can be obtained here]
15+
16+
== Licensing & Cost
17+
18+
Neo4j Enterprise Edition (EE) is available to any existing enterprise license holder of Neo4j in a Bring Your Own License (BYOL) arrangement. Neo4j EE is also available under evaluation licenses, contact Neo4j in order to obtain one. There is no hourly or metered cost associated with using Neo4j EE for current license holders.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
2+
= Quick Start
3+
4+
[abstract]
5+
Get started fast for common scenarios
6+
7+
== Download a Release
8+
9+
Find the URL of a https://github.com/neo4j-contrib/neo4j-helm/releases[copy of a release]; it will be named `neo4j-$RELEASEVERSION.tgz`
10+
11+
[NOTE]
12+
You must set `acceptLicenseAgreement=yes` to accept the license, or your deployment will not succeed.
13+
14+
== Standalone (Single Server)
15+
16+
```shell
17+
helm install mygraph RELEASE_URL \
18+
--set core.standalone=true \
19+
--set acceptLicenseAgreement=yes \
20+
--set neo4jPassword=mySecretPassword
21+
```
22+
23+
== Causal Cluster
24+
25+
```shell
26+
helm install mygraph RELEASE_URL \
27+
--set acceptLicenseAgreement=yes \
28+
--set neo4jPassword=mySecretPassword \
29+
--set core.numberOfServers=3 \
30+
--set readReplica.numberOfServers=0
31+
```
32+
33+
== Uninstalling
34+
35+
```shell
36+
helm uninstall mygraph
37+
```
38+
39+
== Where to Go For Help
40+
41+
If you have a question not covered in this user guide in the other sections, the
42+
https://community.neo4j.com/c/neo4j-graph-platform/cloud/76[Neo4j Community Site] is a great place
43+
to ask for help.
44+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
=== Getting Started
2+
3+
The following section will cover the DataSource Writer aspects this means about how to transfer,
4+
the Spark's Dataset content into Neo4j.
5+
6+
Given the following Scala Program:
7+
8+
[source,scala]
9+
----
10+
import org.apache.spark.sql.{SaveMode, SparkSession}
11+
12+
import scala.util.Random
13+
14+
val sparkSession = SparkSession.builder().getOrCreate()
15+
import sparkSession.implicits._
16+
17+
case class Point3d(`type`: String = "point-3d",
18+
srid: Int,
19+
x: Double,
20+
y: Double,
21+
z: Double)
22+
23+
case class Person(name: String, surname: String, age: Int, livesIn: Point3d)
24+
25+
val total = 10
26+
val rand = Random
27+
val ds = (1 to total)
28+
.map(i => Person(name = "Andrea " + i, "Santurbano " + i, rand.nextInt(100),
29+
Point3d(srid = 4979, x = 12.5811776, y = 41.9579492, z = 1.3))).toDS()
30+
31+
ds.write
32+
.format("org.neo4j.spark.DataSource")
33+
.mode(SaveMode.ErrorIfExists)
34+
.option("url", "bolt://localhost:7687")
35+
.option("labels", ":Person:Customer")
36+
.save()
37+
----
38+
39+
Will insert 10 nodes into Neo4j via Spark, and each of these will have:
40+
* 2 `labels`: `Person` and `Customer`
41+
* 4 `properties`: `name`, `surname`, `age` and `livesIn`
42+
43+
==== Save Mode
44+
45+
In order to persist data into Neo4j the Spark Connector supports two save mode that will
46+
work only if `UNIQUE` or `NODE KEY` constraints are defined into Neo4j for the given properties:
47+
48+
* `SaveMode.ErrorIfExists`: this will build a `CREATE` query
49+
* `SaveMode.Overwrite`: this will build a `MERGE` query
50+
51+
==== Options
52+
53+
The DataSource Writer has several options in order to connect and persist data into Neo4j.
54+
55+
.Most Common Needed Configuration Settings
56+
|===
57+
|Setting Name |Description |Default Value |Required
58+
59+
|`labels`
60+
|: separated list of the labels to attach to the node.
61+
|_(none)_
62+
|No
63+
64+
|`batch.size`
65+
|The number of the rows sent to Neo4j as batch.
66+
|5000
67+
|No
68+
69+
|`node.keys`
70+
|The comma separated list of properties considered as node keys in case of you're using
71+
`SaveMode.Overwrite`
72+
|_(none)_
73+
|No
74+
75+
|`transaction.codes.fail`
76+
|Comma separated list of Neo4j
77+
|_(none)_
78+
|No
79+
80+
|===
81+
82+
==== How the Spark Connector persist the data
83+
84+
[NOTE]
85+
As the Neo4j Spark Connector provide batch writes in order to speed-up the ingestion process
86+
so if in the process at some point fails all the previous data is already persisted.
87+
88+
===== Nodes
89+
90+
In case you use the option `labels` the Spark Connector will persist the entire Dataset as nodes.
91+
Depending on the `SaveMode` it will `CREATE` or `MERGE` nodes (in the last case using the `node.keys`
92+
properties).
93+
The nodes will be sent to Neo4j in a batch of rows defined in the `batch.size` property and we will
94+
perform the under the hood un `UNWIND` operation over the batch.
95+
96+
I.e. given the following script:
97+
98+
[source,scala]
99+
----
100+
import org.apache.spark.sql.{SaveMode, SparkSession}
101+
102+
import scala.util.Random
103+
104+
val sparkSession = SparkSession.builder().getOrCreate()
105+
import sparkSession.implicits._
106+
107+
case class Point3d(`type`: String = "point-3d",
108+
srid: Int,
109+
x: Double,
110+
y: Double,
111+
z: Double)
112+
113+
case class Person(name: String, surname: String, age: Int, livesIn: Point3d)
114+
115+
val total = 10
116+
val rand = Random
117+
val ds = (1 to total)
118+
.map(i => Person(name = "Andrea " + i, "Santurbano " + i, rand.nextInt(100),
119+
Point3d(srid = 4979, x = 12.5811776, y = 41.9579492, z = 1.3))).toDS()
120+
121+
ds.write
122+
.format("org.neo4j.spark.DataSource")
123+
.mode(SaveMode.ErrorIfExists)
124+
.option("url", "bolt://localhost:7687")
125+
.option("labels", ":Person:Customer")
126+
.save()
127+
----
128+
129+
Under the hod the Spark Connector will perform the following Cypher query:
130+
131+
[source,cypher]
132+
----
133+
UNwIND $events AS event
134+
CREATE (n:`Person`:`Customer`) SET n += event.properties
135+
----
136+
137+
For the same script as above except for this part
138+
139+
----
140+
ds.write
141+
.format("org.neo4j.spark.DataSource")
142+
.mode(SaveMode.Overwrite)
143+
.option("url", "bolt://localhost:7687")
144+
.option("labels", ":Person:Customer")
145+
.option("node.keys", "name,surname")
146+
.save()
147+
----
148+
149+
Under the hod the Spark Connector will perform the following Cypher query:
150+
151+
[source,cypher]
152+
----
153+
UNwIND $events AS event
154+
MERGE (n:`Person`:`Customer` {name: event.keys.name, surname: event.keys.surname})
155+
SET n += event.properties
156+
----
157+
158+
In case of the column value is a Map<String, `Value`> (where value can be any supported
159+
https://neo4j.com/docs/cypher-manual/current/syntax/values/[Neo4j Type]) the Connector will automatically
160+
try to flatten it, so if you have the follwing Dataset:
161+
162+
|===
163+
|id |name |lives_in
164+
165+
|1
166+
|Andrea Santurbano
167+
|{address: 'Times Square, 1', city: 'NY', state: 'NY'}
168+
169+
|1
170+
|Davide Fantuzzi
171+
|{address: 'Statue of Liberty, 10', city: 'NY', state: 'NY'}
172+
173+
|===
174+
175+
Under the hod the Spark Connector will flatten the data in this way:
176+
177+
|===
178+
|id |name |`lives_in.address` |`lives_in.address` |`lives_in.city` |`lives_in.state`
179+
180+
|1
181+
|Andrea Santurbano
182+
|Times Square, 1
183+
|NY
184+
|NY
185+
186+
|1
187+
|Davide Fantuzzi
188+
|Statue of Liberty, 10
189+
|NY
190+
|NY
191+
192+
|===
193+
194+
===== Query
195+
196+
In case you use the option `query` the Spark Connector will persist the entire Dataset by using the provided query.
197+
The nodes will be sent to Neo4j in a batch of rows defined in the `batch.size` property and we will
198+
perform the under the hood un `UNWIND` operation over the batch.
199+
200+
So given the following simple Spark program:
201+
202+
----
203+
ds.write
204+
.format("org.neo4j.spark.DataSource")
205+
.option("url", "bolt://localhost:7687")
206+
.option("query", "CREATE (n:Person{fullName: event.name + event.surname})")
207+
.save()
208+
----
209+
210+
Under the hod the Spark Connector will perform the following Cypher query:
211+
212+
[source,cypher]
213+
----
214+
UNwIND $events AS event
215+
CREATE (n:Person{fullName: event.name + event.surname})
216+
----
217+
218+
Where `event` represents each Dataset row.
+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
[#restore]
2+
# Restoring Neo4j Containers
3+
4+
[NOTE]
5+
**This approach assumes you have Google Cloud credentials and wish to store your backups
6+
on Google Cloud Storage**. If this is not the case, you will need to adjust the restore
7+
script for your desired cloud storage method, but the approach will work for any backup location.
8+
9+
[NOTE]
10+
**This approach works only for Neo4j 4.0+**. The tools and the
11+
DBMS itself changed quite a lot between 3.5 and 4.0, and the approach
12+
here will likely not work for older databases without substantial
13+
modification.
14+
15+
## Approach
16+
17+
The restore container is used as an `initContainer` in the main cluster. Prior to
18+
a node in the Neo4j cluster starting, the restore container copies down the backup
19+
set, and restores it into place. When the initContainer terminates, the regular
20+
Neo4j docker instance starts, and picks up where the backup left off.
21+
22+
This container is primarily tested against the backup .tar.gz archives produced by
23+
the `backup` container in this same code repository. We recommend you use that approach. If you tar/gz your own backups using a different approach, be careful to
24+
inspect the `restore.sh` script, because it needs to make certain assumptions about
25+
directory structure that come out of archived backups in order to restore properly.
26+
27+
28+
### Create a service key secret to access cloud storage
29+
30+
First you want to create a kubernetes secret that contains the content of your account service key. This key must have permissions to access the bucket and backup set that you're trying to restore.
31+
32+
```shell
33+
MY_SERVICE_ACCOUNT_KEY=$HOME/.google/my-service-key.json
34+
kubectl create secret generic neo4j-service-key \
35+
--from-file=credentials.json=$MY_SERVICE_ACCOUNT_KEY
36+
```
37+
38+
The restore container is going to take this kubernetes secret
39+
(named `neo4j-service-key`) and is going to mount it as a file
40+
inside of the backup container (`/auth/credentials.json`). That
41+
file will then be used to authenticate the storage client that we
42+
need to upload the backupset to cloud storage when it's complete.
43+
44+
In `values.yaml`, then configure the secret you set here like so:
45+
46+
```yaml
47+
restoreSecret: neo4j-service-key
48+
```
49+
50+
This allows the core and read replica nodes to access that service key
51+
as a volume. That volume being present within the containers is necessary for the
52+
next step, and will be mounted as `/auth/credentials.json` inside the container.
53+
54+
If this service key secret is not in place, the auth information will not be able to be mounted as
55+
a volume in the initContainer, and your pods may get stuck/hung at "ContainerCreating" phase.
56+
57+
### Configure the initContainer for Core and Read Replica Nodes
58+
59+
Refer to the single instance restore deploy scenario to see how the initContainers are configured.
60+
61+
What you will need to customize and ensure:
62+
* Ensure you have created the appropriate secret and set its name
63+
* Ensure that the volume mount to /auth matches the secret name you created above.
64+
* Ensure that your BUCKET, and GOOGLE_APPLICATION_CREDENTIALS are
65+
set correctly given the way you created your secret.
66+
67+
The example scenario above creates the initContainer just for core nodes. It's strongly recommended you do the same for `readReplica.initContainers` if you are using read replicas. If you restore only to core nodes and not to read replicas, when they start the core nodes will replicate the data to the read replicas. This will work just fine, but may result in longer startup times and much more bandwidth.
68+
69+
## Restore Environment Variables for the Init Container
70+
71+
### Required
72+
73+
- `GOOGLE_APPLICATION_CREDENTIALS` - path to a file with a JSON service account key (see credentials below). Defaults to /auth/credentials.json
74+
- `BUCKET` - the storage bucket where backups are located, e.g. `gs://bucketname`. This parameter may include a relative path (`gs://bucketname/mycluster`)
75+
- `DATABASE` - comma-separated list of databases to restore, e.g. neo4j,system
76+
* `TIMESTAMP` - this defaults to "latest". See the backup container's documentation
77+
on the latest pointer. But you may set this to a particular timestamp to restore
78+
that exact moment in time. This timestamp must match the filename in storage.
79+
So if you want to restore the backup set at `neo4j-2020-06-16-12:32:57.tar.gz ` then
80+
the TIMESTAMP would be `2020-06-16-12:32:57`.
81+
82+
### Optional
83+
84+
- `PURGE_ON_COMPLETE` (defaults to true). If this is set to the value "true", the restore process will remove the restore artifacts from disk. With any other
85+
value, they will be left in place. This is useful for debugging restores, to
86+
see what was copied down from cloud storage and how it was expanded.
87+
- `FORCE_OVERWRITE` if this is the value "true", then the restore process will overwrite and
88+
destroy any existing data that is on the volume. Take care when using this in combination with
89+
persistent volumes. The default is false; if data already exists on the drive, the restore operation will fail but preserve your data. **You must set this to true
90+
if you want restore to work over-top of an existing database**.
91+
92+
## Warnings
93+
94+
A common way you might deploy Neo4j would be restore from last backup when a container initializes. This would be good for a cluster, because it would minimize how much catch-up
95+
is needed when a node is launched. Any difference between the last backup and the rest of the
96+
cluster would be provided via catch-up.
97+
98+
[NOTE]
99+
For single nodes, take extreme care here.
100+
101+
If a node crashes, and you automatically restore from
102+
backup, and force-overwrite what was previously on the disk, you will lose any data that the
103+
database captured between when the last backup was taken, and when the crash happened. As a
104+
result, for single node instances of Neo4j you should either perform restores manually when you
105+
need them, or you should keep a very regular backup schedule to minimize this data loss. If data
106+
loss is under no circumstances acceptable, do not automate restores for single node deploys.
107+
108+
## Running the Restore
109+
110+
With the initContainer in place and properly configured, simply deploy a new cluster
111+
using the regular approach. Prior to start, the restore will happen, and when the
112+
cluster comes live, it will be populated with the data.
113+
114+
## Limitations
115+
116+
- If you want usernames, passwords, and permissions to be restored, you must include
117+
a restore of the system graph.
118+
- Container has not yet been tested with incremental backups
119+
- For the time being, only google storage as a cloud storage option is implemented,
120+
but adapting this approach to S3 or other storage should be fairly straightforward with modifications to `restore.sh`
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
# Rolling Upgrades
2+
3+
[NOTE]
4+
This document expands on the Neo4j Operations Manual entry
5+
https://neo4j.com/docs/operations-manual/current/upgrade/causal-cluster/[Upgrade a Causal Cluster] with information about approaches on rolling upgrades in Kubernetes.
6+
7+
## Before you Begin
8+
9+
1. Read all of that documentation before attempting rolling upgrades. Not all relevant concepts will be described in this document. Familiarity with the other pages will be assumed.
10+
2. Perform a test upgrade on a production-like environment to get information on the duration of the downtime, if any, that may be necessary.
11+
12+
## When is this needed?
13+
14+
* When you have a Neo4j Causal Cluster (standalone does not apply)
15+
* When you need to upgrade to a new minor or patch version of Neo4j
16+
* When you must maintain the cluster online with both read and write capabilities during the course of the upgrade process.
17+
18+
## What This Approach Doesn't Cover
19+
20+
Moving between major versions of Neo4j (for example, 3.5 and 4.0).
21+
This requires more planning, due to these factors:
22+
23+
* Substantial configuration changes needed on the pods between 3.5 and 4.0
24+
* Major changes in product features which impact what clients can rely upon.
25+
* Changes in the helm charts used, and their structure; if you're using an old
26+
3.5 helm chart, the differences between what you're using and this repo may be substantial.
27+
* The need for a store upgrade operation to change the format of Neo4j data on disk
28+
29+
Neo4j 4.0 is not backwards compatible with 3.5. Additional planning and offline upgrade is recommended.
30+
31+
If you are in the situation of migrating from Neo4j 3.5 -> 4.0, please consult
32+
https://neo4j.com/docs/migration-guide/current/[the Neo4j Migration Guide].
33+
34+
If this won't work for your constraints, please check "Alternatives to Rolling Upgrades"
35+
at the very bottom of this document.
36+
37+
## High-Level Approach
38+
39+
1. Take a backup
40+
2. Scale the core statefulset up, to maintain high availability.
41+
3. Choose and apply your UpdateStrategy.
42+
4. Patch the statefulset to apply the new Neo4j version
43+
5. Monitor the process
44+
6. (Optional/If Applicable) Apply the above process to the read replica StatefulSet as well.
45+
7. Scale back down on success to the original size.
46+
47+
We will now describe each step, how to do it, and why.
48+
49+
## Take a Backup
50+
51+
Before doing any major system maintenance operation, it's crucial to have an up-to-date backup, ensuring that if anything goes wrong, there is a point in time to return to for the database's state.
52+
53+
In addition, all operations should be tested on a staging or a production-like environment as a "dry run" before attempting this on application-critical systems. Performing backups is covered in the user guide in this repository.
54+
55+
## Scale the Core Statefulset Up
56+
57+
If you'd normally have 3 core members in your statefulset, they are providing a valuable
58+
https://neo4j.com/docs/operations-manual/current/clustering/introduction/#causal-clustering-introduction-operational[high availability purpose, and a quorum].
59+
60+
In a rolling upgrade operation, we are going to take each server *down* in its turn. And while one is stopping/restarting, we're (temporarily) damaging the HA characteristics of the
61+
cluster, reducing it's ability to serve queries. To mitigate this, before doing
62+
a rolling upgrade we scale the cluster *up*, from say 3 cores to 5. We will then roll
63+
changes through - we will at any given moment have 4 of 5 cores available.
64+
65+
Given a cluster deployment named "mygraph", you can scale it to 5 cores like so:
66+
67+
```shell
68+
kubectl scale statefulsets mygraph-neo4j-core --replicas=5
69+
```
70+
71+
This should immediately schedule 2 new pods with the same configuration (and the *old* version of Neo4j) to start up and join the cluster. It is recommended that you scale by 2, not by 1,
72+
to ensure that the number of cores in the cluster is always odd.
73+
74+
[NOTE]
75+
**Remember** when new members join the cluster, before the cluster is stable, they
76+
need to pull current transactional state. Having members restore from a recent
77+
backup first is strongly recommended, to minimize the load of the
78+
https://neo4j.com/docs/operations-manual/current/clustering-advanced/lifecycle/#causal-clustering-catchup-protocol[catch-up process].
79+
80+
If you do not restore from backups on each pod, it will still work, but the cluster may take
81+
substantial time for the new members to catch up, particularly if you have a lot of data or
82+
transactional history.
83+
84+
Consult https://kubernetes.io/docs/tasks/run-application/scale-stateful-set/#scaling-statefulsets[scaling statefulsets] in the kubernetes documentation for more information.
85+
86+
## Choose and Apply your Update Strategy
87+
88+
For more details, consult the Kubernetes https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#update-strategies[Update strategies for statefulsets].
89+
90+
In the next steps, we're going to tell the statefulset to change versions of Neo4j that it is
91+
running. Before we do, we need to give it a strategy of how the rolling upgrade should proceed.
92+
You basically have 2 key options, `RollingUpdate` and `OnDelete`. This document assumes the
93+
`RollingUpdate` strategy. In this situation, Kubernetes does the following:
94+
95+
* Starting from the end (the highest core number) it shuts a pod down, and restarts it. Because
96+
we're going to patch the version, when that pod restarts, it will come up with the new version.
97+
* Kubernetes waits for the pod to get to the Ready state.
98+
* The rolling update proceeds with the next lowest index.
99+
100+
### Criticality of Readiness / Liveness Checks
101+
102+
Kubernetes doesn't know much about Neo4j. It needs to be provided a definiton of what it
103+
means for Neo4j to be "ready". If the new pod isn't ready before the next one rolls, you
104+
could get into a situation where the cluster doesn't have time to stabilize before losing
105+
more members.
106+
107+
The readiness checks described in the user manual cover this purpose. Review them for
108+
adequacy and appropriateness for your deployment, and revise if necessary *before* performing
109+
a rolling upgrade.
110+
111+
## Patch the StatefulSet
112+
113+
Up until now, everything we're doing was just preparation, but this step actually affects the
114+
change. If you run this, then subject to the `UpdateStrategy` you specified above, Kubernetes
115+
will start killing & restarting pods.
116+
117+
```shell
118+
NEW_VERSION=neo4j:4.1.0-enterprise
119+
kubectl patch statefulset mygraph-neo4j-core --type='json' \
120+
-p='[{"op": "replace", "path": "/spec/template/spec/containers/0/image", "value":"'$NEW_VERSION'"}]'
121+
```
122+
123+
## Monitor the Process
124+
125+
What we're looking for:
126+
* Only one pod at a time stopping/restarting
127+
* Consistent ability to maintain a connection to the cluster, with both read & write ability throughout.
128+
* Monitor the `debug.log` file and container output to make sure the process is proceeding correctly
129+
130+
## (Optional) Apply the Process to the Read Replicas
131+
132+
Read replicas are handled in a second StatefulSet. The process of applying an update
133+
strategy, patching the statefulset, and monitoring is the same though.
134+
135+
**Recommended:** make sure you have a fully migrated & happy core cluster member set
136+
before working with read replicas. Do not roll both sets at once.
137+
138+
## Scale Back Down
139+
140+
If everything was completed correctly, you should end up with a 5 member cluster with
141+
the new version, 100% online. After success, the extra members are no longer needed,
142+
and you may scale back down to the original size of the cluster.
143+
144+
```shell
145+
kubectl scale statefulsets mygraph-neo4j-core --replicas=3
146+
```
147+
148+
## Alternatives to Rolling Upgrades
149+
150+
If you can tolerate a period of write-unavailabilty while maintaining full read-availability,
151+
Kubernetes provides a secondary option to a rolling upgrade. This document will focus on
152+
how to do rolling upgrades, but as a sketch of the main alternative:
153+
154+
1. Configure your current cluster to be backed by a single DNS record (mycluster.company.com)
155+
2. Take a backup of your current cluster.
156+
3. Launch a second cluster running the new version of Neo4j (mycluster-updated). Restore
157+
this cluster from the last backup of `mycluster`.
158+
4. Hot swap the DNS record to point to the second cluster (mycluster-updated)
159+
5. Shut down the original cluster (mycluster)
160+
161+
[NOTE]
162+
If you adopt this approach, you will need a maintenance window where you are
163+
not accepting writes, as of the point of backup. The period of write unavailability should
164+
be between steps 2 and the readiness of DNS in step 4. If writes come in during this time
165+
period to the original cluster, they will be missing form the updated cluster.
166+
167+
This approach should maintain read availability throughout, and it is reasonably safe; i.e.
168+
if the new cluster fails to migrate properly or there is a data issue, this does not compromise
169+
the availability of the running production system.
+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
= Tooling
2+
3+
[abstract]
4+
How to use Neo4j Tooling in conjunction with Kubernetes
5+
6+
7+
## Neo4j Browser
8+
9+
[NOTE]
10+
In order to use Neo4j Browser you must follow the xref::externalexposure.adoc[external exposure instructions] which provide a walk-through of how to make your cluster available on the Internet.
11+
12+
Neo4j browser is available on port 7474 of any of the hostnames described above. However, because of the network environment that the cluster is in, hosts in the neo4j cluster advertise themselves with private internal DNS that is not resolvable from outside of the cluster.
13+
14+
Additionally, to use secure communications, configuration of HTTPS and SSL certificates is recommended.
15+
16+
## Cypher Shell
17+
18+
Upon deploying the helm chart, you will be given a command that can be used to connect to the cluster. This will schedule a new Neo4j pod to run called "cypher-shell" and invoke that command to connect. See NOTES.txt for an example.
19+
20+
Please consult standard Neo4j documentation on the many other usage options present, once you have a basic bolt client and cypher shell capability.
21+
22+
## Neo4j-Admin Import
23+
24+
Matt Cockayne has published the following blog post about how to use kubernetes initContainers to run `neo4j-admin import` and https://phpboyscout.uk/pre-populating-neo4j-using-kubernetes-init-containers-and-neo4j-admin-import/[pre-load data in images from CSV, rather than backup sets].
25+
26+
More generally - the technique he uses is the one that's recommended for any and all other Neo4j shell utilities. Rather than building a custom Docker image, it's recommended that you run a shell script inside of an initContainer to do whatever shell operations are necessary to prepare the data volume prior to the Neo4j's container start.
27+
28+
## Plugins
29+
30+
The Neo4j Docker container can take an extra environment variable NEO4JLABS_PLUGINS that pre-installs the most common plugins. The helm chart in turn has a parameter plugins that lets you specify this value.
31+
32+
The value must be a valid JSON array of plugin names. https://github.com/neo4j/docker-neo4j/blob/master/neo4jlabs-plugins.json[The list of valid names can be found here] but include APOC, Graph Data Science, Neo4j Streams (Kafka integration) and others.
33+
34+
The way this mechanism works is that each plugin publishes a versions.json file that allows the Neo4j Docker container to determine at runtime which version of the plugin JAR to download and put in place.
35+
36+
APOC is included by default; comment out the plugins parameter or set it to the empty string to disable APOC. By adding other plugin names to the default shown below, you can use multiple plugins.
37+
38+
```yaml
39+
plugins: "[\"apoc\"]"
40+
```
41+
42+
[NOTE]
43+
Take care to use proper syntax and escaping: YAML can interpret JSON, and we are aiming to specify a string of JSON.
44+
45+
An example configuration has been provided in the deployment scenarios folder that shows installation of a standalone instance using Neo4j's Graph Data Science plugin.
46+
47+
Other/custom plugins still require the use of initContainers to download and install the plugin at runtime.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
[#troubleshooting]
2+
= Neo4j-Helm Troubleshooting
3+
4+
== Are you starting with empty disks?
5+
6+
The most common issue associated with the helm chart is persistent volume claim reuse. For example, if you deploy `mygraph`, delete this instance, and then redeploy a new, different `mygraph`, it will not get clean empty PVCs, but will reuse the old PVCs from the previous deployment.
7+
8+
[NOTE]
9+
**When you uninstall a helm distribution it does not remove persistent volume claims.**
10+
11+
Make sure to ensure the disks you're starting with are empty to avoid file permissioning and other issues.
12+
13+
== Have you checked debug.log?
14+
15+
Neo4j comes with a built-in `debug.log` file that is stored in the logs directory. This is the place
16+
to check when you run into trouble. Inspect this file and look for `Exception` error messages, and often
17+
the product logs will tell you what the issue is.
18+
19+
== Something Else?
20+
21+
The https://community.neo4j.com/c/neo4j-graph-platform/cloud/76[Neo4j Community Site] is a great place
22+
to ask for help.

‎doc/package-lock.json

+2,289
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎doc/package.json

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"name": "@neo4j/docs",
3+
"version": "1.0.0",
4+
"description": "Neo4j Docs builder",
5+
"main": "index.js",
6+
"scripts": {
7+
"start": "node server.js",
8+
"build": "npm run build:developer && npm run build:labs && npm run build:labs-docs && npm run build:docs",
9+
"build:docs": "antora --fetch --stacktrace docs.yml"
10+
},
11+
"license": "ISC",
12+
"dependencies": {
13+
"@antora/cli": "^2.3.3",
14+
"@antora/site-generator-default": "^2.3.3"
15+
},
16+
"devDependencies": {
17+
"express": "^4.17.1"
18+
}
19+
}

‎doc/server.js

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
const express = require('express')
2+
const app = express()
3+
app.use(express.static('./build/site'))
4+
app.get('/', (req, res) => res.redirect('/neo4j-helm/1.0.0'))
5+
app.listen(8000, () => console.log('📘 http://localhost:8000'))
6+
7+

‎tools/Makefile

+4-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ default: docker_build
22
REGISTRY = gcr.io/neo4j-helm
33
TEST_IMAGE ?= $(REGISTRY)/tester
44
BACKUP_IMAGE ?= $(REGISTRY)/backup
5+
BUILD_IMAGE ?= $(REGISTRY)/build
56
RESTORE_IMAGE ?= $(REGISTRY)/restore
67
DOCKER_TAG ?= `cat ../Chart.yaml | grep version | sed 's/.*: //'`
78

@@ -12,11 +13,12 @@ docker_build:
1213
-t $(TEST_IMAGE):$(DOCKER_TAG) -f test/Dockerfile .
1314

1415
docker build -t $(BACKUP_IMAGE):$(DOCKER_TAG) -f backup/Dockerfile .
15-
16+
docker build -t $(BUILD_IMAGE):latest -f build/Dockerfile .
1617
docker build -t $(RESTORE_IMAGE):$(DOCKER_TAG) -f restore/Dockerfile .
1718

1819
docker_push:
1920
# Push to DockerHub
2021
docker push $(TEST_IMAGE):$(DOCKER_TAG)
2122
docker push $(BACKUP_IMAGE):$(DOCKER_TAG)
22-
docker push $(RESTORE_IMAGE):$(DOCKER_TAG)
23+
docker push $(RESTORE_IMAGE):$(DOCKER_TAG)
24+
docker push $(BUILD_IMAGE):latest

‎tools/build/Dockerfile

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
FROM debian:stretch
2+
3+
# Tools needed
4+
# gcloud
5+
# helm
6+
# kubectl
7+
# node
8+
# npm
9+
10+
# Secure software install; required first in order to be able to process keys, packages, etc.
11+
RUN apt-get update && apt-get install -y apt-transport-https ca-certificates curl gnupg2 software-properties-common
12+
13+
# Google Cloud stuff
14+
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
15+
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add -
16+
17+
# Docker stuff
18+
RUN curl -fsSL https://download.docker.com/linux/debian/gpg | apt-key add -
19+
RUN add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/debian $(lsb_release -cs) stable"
20+
21+
# Will run apt-get update for us.
22+
RUN curl -sL https://deb.nodesource.com/setup_14.x | bash -
23+
24+
RUN apt-get install -y google-cloud-sdk wget make gettext-base jq nodejs npm
25+
26+
# Install helm
27+
RUN curl -LO https://get.helm.sh/helm-v3.2.1-linux-amd64.tar.gz
28+
RUN tar zxvf helm-v3.2.1-linux-amd64.tar.gz
29+
RUN mv linux-amd64/helm /usr/bin
30+
RUN /usr/bin/helm version
31+
32+
# Kubectl
33+
RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl
34+
RUN chmod +x kubectl
35+
RUN mv kubectl /usr/bin
36+
RUN /usr/bin/kubectl --help
37+

‎tools/build/README-build.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
This directory contains a dockerfile used to build the container that runs in the
2+
CI pipeline. As such, it needs kubernetes, google, and docker tools to interact
3+
with the repo itself.

0 commit comments

Comments
 (0)
This repository has been archived.