Skip to content

Commit

Permalink
feat: add datalake service (#8184)
Browse files Browse the repository at this point in the history
Signed-off-by: Alexander Onnikov <[email protected]>
  • Loading branch information
aonnikov authored Mar 10, 2025
1 parent 06abf0f commit 92273f5
Show file tree
Hide file tree
Showing 34 changed files with 2,421 additions and 3 deletions.
2 changes: 1 addition & 1 deletion common/config/rush/command-line.json
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@
"summary": "Build docker with platform",
"description": "use to build all docker containers required for platform",
"safeForSimultaneousRushProcesses": true,
"shellCommand": "rush docker:build -p 20 --to @hcengineering/pod-server --to @hcengineering/pod-front --to @hcengineering/prod --to @hcengineering/pod-account --to @hcengineering/pod-workspace --to @hcengineering/pod-collaborator --to @hcengineering/tool --to @hcengineering/pod-print --to @hcengineering/pod-sign --to @hcengineering/pod-analytics-collector --to @hcengineering/rekoni-service --to @hcengineering/pod-ai-bot --to @hcengineering/import-tool --to @hcengineering/pod-stats --to @hcengineering/pod-fulltext --to @hcengineering/pod-love --to @hcengineering/green --to @hcengineering/pod-mail"
"shellCommand": "rush docker:build -p 20 --to @hcengineering/pod-server --to @hcengineering/pod-front --to @hcengineering/prod --to @hcengineering/pod-account --to @hcengineering/pod-workspace --to @hcengineering/pod-collaborator --to @hcengineering/tool --to @hcengineering/pod-print --to @hcengineering/pod-sign --to @hcengineering/pod-analytics-collector --to @hcengineering/rekoni-service --to @hcengineering/pod-ai-bot --to @hcengineering/import-tool --to @hcengineering/pod-stats --to @hcengineering/pod-fulltext --to @hcengineering/pod-love --to @hcengineering/green --to @hcengineering/pod-mail --to @hcengineering/pod-datalake"
},
{
"commandKind": "global",
Expand Down
52 changes: 52 additions & 0 deletions common/config/rush/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions rush.json
Original file line number Diff line number Diff line change
Expand Up @@ -2118,6 +2118,11 @@
"projectFolder": "dev/doc-import-tool",
"shouldPublish": false
},
{
"packageName": "@hcengineering/pod-datalake",
"projectFolder": "services/datalake/pod-datalake",
"shouldPublish": false
},
{
"packageName": "@hcengineering/pod-love",
"projectFolder": "services/love",
Expand Down
4 changes: 2 additions & 2 deletions server/datalake/src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ export class DatalakeClient {
// R2

async getR2UploadParams (ctx: MeasureContext, workspace: WorkspaceUuid): Promise<R2UploadParams> {
const path = `/upload/r2/${workspace}`
const path = `/upload/s3/${workspace}`
const url = concatLink(this.endpoint, path)

const response = await fetchSafe(ctx, url, { headers: { ...this.headers } })
Expand All @@ -388,7 +388,7 @@ export class DatalakeClient {
filename: string
}
): Promise<void> {
const path = `/upload/r2/${workspace}/${encodeURIComponent(objectName)}`
const path = `/upload/s3/${workspace}/${encodeURIComponent(objectName)}`
const url = concatLink(this.endpoint, path)

await fetchSafe(ctx, url, {
Expand Down
7 changes: 7 additions & 0 deletions services/datalake/pod-datalake/.eslintrc.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
module.exports = {
extends: ['./node_modules/@hcengineering/platform-rig/profiles/default/eslint.config.json'],
parserOptions: {
tsconfigRootDir: __dirname,
project: './tsconfig.json'
}
}
4 changes: 4 additions & 0 deletions services/datalake/pod-datalake/.npmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
*
!/lib/**
!CHANGELOG.md
/lib/**/__tests__/
9 changes: 9 additions & 0 deletions services/datalake/pod-datalake/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
FROM hardcoreeng/base:v20250113a
WORKDIR /usr/src/app

RUN npm install --ignore-scripts=false --verbose [email protected] --unsafe-perm

COPY bundle/bundle.js ./
COPY bundle/bundle.js.map ./

CMD [ "dumb-init", "node", "bundle.js" ]
4 changes: 4 additions & 0 deletions services/datalake/pod-datalake/config/rig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"$schema": "https://developer.microsoft.com/json-schemas/rig-package/rig.schema.json",
"rigPackageName": "@hcengineering/platform-rig"
}
7 changes: 7 additions & 0 deletions services/datalake/pod-datalake/jest.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
module.exports = {
preset: 'ts-jest',
testEnvironment: 'node',
testMatch: ['**/?(*.)+(spec|test).[jt]s?(x)'],
roots: ["./src"],
coverageReporters: ["text-summary", "html"]
}
76 changes: 76 additions & 0 deletions services/datalake/pod-datalake/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
{
"name": "@hcengineering/pod-datalake",
"version": "0.6.0",
"main": "lib/index.js",
"svelte": "src/index.ts",
"types": "types/index.d.ts",
"files": [
"lib/**/*",
"types/**/*",
"tsconfig.json"
],
"author": "Hardcore Engineering Inc.",
"scripts": {
"build": "compile",
"build:watch": "compile",
"test": "jest --passWithNoTests --silent",
"_phase:bundle": "rushx bundle",
"_phase:docker-build": "rushx docker:build",
"_phase:docker-staging": "rushx docker:staging",
"bundle": "node ../../../common/scripts/esbuild.js --keep-names=true --sourcemap=external --external=sharp",
"docker:build": "../../../common/scripts/docker_build.sh hardcoreeng/datalake",
"docker:tbuild": "docker build -t hardcoreeng/datalake . --platform=linux/amd64 && ../../../common/scripts/docker_tag_push.sh hardcoreeng/datalake",
"docker:abuild": "docker build -t hardcoreeng/datalake . --platform=linux/arm64 && ../../../common/scripts/docker_tag_push.sh hardcoreeng/datalake",
"docker:staging": "../../../common/scripts/docker_tag.sh hardcoreeng/datalake staging",
"docker:push": "../../../common/scripts/docker_tag.sh hardcoreeng/datalake",
"run-local": "cross-env ts-node src/index.ts",
"format": "format src",
"_phase:build": "compile transpile src",
"_phase:test": "jest --passWithNoTests --silent",
"_phase:format": "format src",
"_phase:validate": "compile validate"
},
"devDependencies": {
"@hcengineering/platform-rig": "^0.6.0",
"@tsconfig/node16": "^1.0.4",
"@types/cors": "^2.8.12",
"@types/express": "^4.17.13",
"@types/express-fileupload": "^1.1.7",
"@types/node": "~20.11.16",
"@types/ws": "^8.5.11",
"@typescript-eslint/eslint-plugin": "^6.11.0",
"@typescript-eslint/parser": "^6.11.0",
"esbuild": "^0.24.2",
"eslint": "^8.54.0",
"eslint-config-standard-with-typescript": "^40.0.0",
"eslint-plugin-import": "^2.26.0",
"eslint-plugin-n": "^15.4.0",
"eslint-plugin-node": "^11.1.0",
"eslint-plugin-promise": "^6.1.1",
"jest": "^29.7.0",
"ts-jest": "^29.1.1",
"@types/jest": "^29.5.5",
"prettier": "^3.1.0",
"ts-node": "^10.8.0",
"typescript": "^5.3.3",
"@types/sharp": "~0.32.0"
},
"dependencies": {
"@hcengineering/server-token": "^0.6.11",
"@hcengineering/server-core": "^0.6.1",
"@hcengineering/server-client": "^0.6.0",
"@hcengineering/core": "^0.6.32",
"@hcengineering/platform": "^0.6.11",
"@hcengineering/account-client": "^0.6.0",
"cors": "^2.8.5",
"dotenv": "~16.0.0",
"express": "^4.21.2",
"express-fileupload": "^1.5.1",
"postgres": "^3.4.5",
"sharp": "~0.32.0",
"@aws-sdk/client-s3": "^3.738.0",
"@aws-sdk/s3-request-presigner": "^3.738.0",
"@aws-sdk/lib-storage": "^3.738.0",
"@smithy/node-http-handler": "^4.0.2"
}
}
90 changes: 90 additions & 0 deletions services/datalake/pod-datalake/src/config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
//
// Copyright © 2025 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//

export interface BucketConfig {
bucket: string
location: string
endpoint: string
accessKey: string
secretKey: string
region?: string
}

export interface Config {
Port: number
Secret: string
AccountsUrl: string
DbUrl: string
Buckets: BucketConfig[]
}

const parseNumber = (str: string | undefined): number | undefined => (str !== undefined ? Number(str) : undefined)

function parseBucketsConfig (str: string | undefined): BucketConfig[] {
if (str === undefined) {
return []
}

const buckets = str.split(';')
return buckets.map(parseBucketConfig)
}

function parseBucketConfig (str: string): BucketConfig {
if (str === undefined) {
throw new Error('Invalid bucket config')
}

const [name, url] = str.split('|')
if (name === undefined || url === undefined) {
throw new Error('Invalid bucket config')
}

const [bucket, location] = name.split(',')
if (bucket === undefined || location === undefined) {
throw new Error('Invalid bucket config')
}

const uri = new URL(url)
const endpoint = uri.protocol + '//' + uri.hostname + uri.pathname

return {
bucket,
location,
endpoint,
accessKey: uri.searchParams.get('accessKey') ?? '',
secretKey: uri.searchParams.get('secretKey') ?? '',
region: uri.searchParams.get('region') ?? 'auto'
}
}

const config: Config = (() => {
const params: Partial<Config> = {
Port: parseNumber(process.env.PORT) ?? 4030,
Secret: process.env.SECRET,
AccountsUrl: process.env.ACCOUNTS_URL,
DbUrl: process.env.DB_URL,
Buckets: parseBucketsConfig(process.env.BUCKETS)
}

const missingEnv = (Object.keys(params) as Array<keyof Config>).filter((key) => params[key] === undefined)

if (missingEnv.length > 0) {
throw Error(`Missing config for attributes: ${missingEnv.join(', ')}`)
}

return params as Config
})()

export default config
19 changes: 19 additions & 0 deletions services/datalake/pod-datalake/src/const.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
//
// Copyright © 2025 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//

export const expires = 86400
export const cacheControl = `public,max-age=${expires}`

export const hashLimit = 1 * 1024 * 1024
Loading

0 comments on commit 92273f5

Please sign in to comment.