Skip to content

Commit a4a3be6

Browse files
Merge pull request #120 from learningOrchestra/feature-data-scientist-pipeline
Feature data scientist pipeline
2 parents ce38d66 + 6edcead commit a4a3be6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+3771
-986
lines changed

docker-compose.yml

Lines changed: 87 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -99,13 +99,35 @@ services:
9999
volumes:
100100
- "images:/var/lib/registry"
101101

102-
visualizer:
103-
image: dockersamples/visualizer:latest
102+
agent:
103+
image: portainer/agent:linux-amd64-2.1.0-alpine
104104
volumes:
105-
- "/var/run/docker.sock:/var/run/docker.sock"
105+
- /var/run/docker.sock:/var/run/docker.sock
106+
- /var/lib/docker/volumes:/var/lib/docker/volumes
107+
networks:
108+
- portainer
109+
deploy:
110+
mode: global
111+
placement:
112+
constraints:
113+
- "node.platform.os==linux"
114+
115+
portainer:
116+
image: portainer/portainer-ce:2.1.1-alpine
117+
command: -H tcp://tasks.agent:9001 --tlsskipverify
106118
ports:
107-
- "8000:8080"
108-
deploy: *default-deploy-manager
119+
- "9000:9000"
120+
- "8000:8000"
121+
volumes:
122+
- portainer:/data
123+
networks:
124+
- portainer
125+
deploy:
126+
mode: replicated
127+
replicas: 1
128+
placement:
129+
constraints:
130+
- "node.role==manager"
109131

110132
sparkmaster:
111133
build: microservices/spark_image
@@ -201,68 +223,96 @@ services:
201223
- database
202224
environment: *default-service-database-env
203225

204-
tsne:
205-
build: microservices/tsne_image
206-
image: 127.0.0.1:5050/tsne
226+
databasexecutor:
227+
build: microservices/database_executor_image
228+
image: 127.0.0.1:5050/database_executor
207229
ports:
208-
- "5005:5005"
209-
- "41200:41200"
210-
extra_hosts:
211-
- "tsne:0.0.0.0"
230+
- "5006:5006"
212231
depends_on:
213232
- databaseprimary
214233
- images
215-
- sparkmaster
216-
- sparkworker
217234
deploy: *default-deploy-manager
218235
volumes:
219-
- "tsne:/images"
236+
- "database_executor:/explore"
237+
- "database_executor:/transform"
238+
- "default_model:/models"
239+
- "binary_executor:/binaries"
220240
networks:
221241
- database
222-
- spark
223242
environment: *default-service-database-env
224243

225-
pca:
226-
build: microservices/pca_image
227-
image: 127.0.0.1:5050/pca
244+
gatewayapi:
245+
image: devopsfaith/krakend:1.2.0
246+
volumes:
247+
- "./microservices/krakend:/etc/krakend"
228248
ports:
229-
- "5006:5006"
230-
- "41300:41300"
231-
extra_hosts:
232-
- "pca:0.0.0.0"
249+
- "80:8080"
250+
- "8090:8090"
251+
deploy: *default-deploy-manager
252+
networks:
253+
- database
254+
- spark
255+
256+
defaultmodel:
257+
build: microservices/default_model_image
258+
image: 127.0.0.1:5050/default_model
259+
ports:
260+
- "5007:5007"
233261
depends_on:
234262
- databaseprimary
235263
- images
236-
- sparkmaster
237-
- sparkworker
238264
deploy: *default-deploy-manager
239-
volumes:
240-
- "pca:/images"
241265
networks:
242266
- database
243-
- spark
244267
environment: *default-service-database-env
245-
246-
gatewayapi:
247-
image: devopsfaith/krakend:1.2.0
248268
volumes:
249-
- "./microservices/krakend:/etc/krakend"
269+
- "default_model:/models"
270+
271+
binaryexecutor:
272+
build: microservices/binary_executor_image
273+
image: 127.0.0.1:5050/binary_executor
250274
ports:
251-
- "80:8080"
252-
- "8090:8090"
275+
- "5008:5008"
276+
depends_on:
277+
- databaseprimary
278+
- images
253279
deploy: *default-deploy-manager
254280
networks:
255281
- database
256-
- spark
282+
environment: *default-service-database-env
283+
volumes:
284+
- "default_model:/models"
285+
- "binary_executor:/binaries"
286+
- "database_executor:/transform"
257287

288+
codexecutor:
289+
build: microservices/code_executor_image
290+
image: 127.0.0.1:5050/code_executor
291+
ports:
292+
- "5009:5009"
293+
depends_on:
294+
- databaseprimary
295+
- images
296+
deploy: *default-deploy-manager
297+
volumes:
298+
- "database_executor:/explore"
299+
- "database_executor:/transform"
300+
- "default_model:/models"
301+
- "binary_executor:/binaries"
302+
networks:
303+
- database
304+
environment: *default-service-database-env
258305

259306
networks:
260307
database:
261308
spark:
309+
portainer:
262310

263311
volumes:
264312
images:
265313
database:
266314
database_api:
267-
tsne:
268-
pca:
315+
database_executor:
316+
default_model:
317+
binary_executor:
318+
portainer:
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
FROM python:3.7-slim
2+
3+
WORKDIR /usr/src/binary_executor
4+
COPY . /usr/src/binary_executor
5+
RUN pip install -r requirements.txt
6+
7+
ENV MICROSERVICE_IP "0.0.0.0"
8+
ENV MICROSERVICE_PORT 5008
9+
ENV BINARY_VOLUME_PATH "/binaries"
10+
ENV MODELS_VOLUME_PATH "/models"
11+
ENV TRANSFORM_VOLUME_PATH "/transform"
12+
13+
CMD ["python", "server.py"]
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
import importlib
2+
from concurrent.futures import ThreadPoolExecutor
3+
from utils import Database, Data, Metadata, ObjectStorage
4+
from constants import Constants
5+
6+
7+
class Parameters:
8+
__DATASET_KEY_CHARACTER = "$"
9+
__DATASET_WITH_OBJECT_KEY_CHARACTER = "."
10+
__REMOVE_KEY_CHARACTER = ""
11+
12+
def __init__(self, database: Database, data: Data):
13+
self.__database_connector = database
14+
self.__data = data
15+
16+
def treat(self, method_parameters: dict) -> dict:
17+
parameters = method_parameters.copy()
18+
19+
for name, value in parameters.items():
20+
if self.__is_dataset(value):
21+
dataset_name = self.__get_dataset_name_from_value(
22+
value)
23+
if self.__has_dot_in_dataset_name(value):
24+
object_name = self.__get_name_after_dot_from_value(value)
25+
26+
parameters[name] = self.__data.get_object_from_dataset(
27+
dataset_name, object_name)
28+
29+
else:
30+
parameters[name] = self.__data.get_dataset_content(
31+
dataset_name)
32+
33+
return parameters
34+
35+
def __is_dataset(self, value: str) -> bool:
36+
return self.__DATASET_KEY_CHARACTER in value
37+
38+
def __get_dataset_name_from_value(self, value: str) -> str:
39+
dataset_name = value.replace(self.__DATASET_KEY_CHARACTER,
40+
self.__REMOVE_KEY_CHARACTER)
41+
return dataset_name.split(self.__DATASET_WITH_OBJECT_KEY_CHARACTER)[
42+
Constants.FIRST_ARGUMENT]
43+
44+
def __has_dot_in_dataset_name(self, dataset_name: str) -> bool:
45+
return self.__DATASET_WITH_OBJECT_KEY_CHARACTER in dataset_name
46+
47+
def __get_name_after_dot_from_value(self, value: str) -> str:
48+
return value.split(
49+
self.__DATASET_WITH_OBJECT_KEY_CHARACTER)[Constants.SECOND_ARGUMENT]
50+
51+
52+
class Execution:
53+
__DATASET_KEY_CHARACTER = "$"
54+
__REMOVE_KEY_CHARACTER = ""
55+
56+
def __init__(self,
57+
database_connector: Database,
58+
executor_name: str,
59+
executor_service_type: str,
60+
parent_name: str,
61+
parent_name_service_type: str,
62+
metadata_creator: Metadata,
63+
class_method: str,
64+
parameters_handler: Parameters,
65+
storage: ObjectStorage,
66+
):
67+
self.__metadata_creator = metadata_creator
68+
self.__thread_pool = ThreadPoolExecutor()
69+
self.__database_connector = database_connector
70+
self.__storage = storage
71+
self.__parameters_handler = parameters_handler
72+
self.executor_name = executor_name
73+
self.parent_name = parent_name
74+
self.class_method = class_method
75+
self.executor_service_type = executor_service_type
76+
self.parent_name_service_type = parent_name_service_type
77+
78+
def create(self,
79+
module_path: str,
80+
class_name: str,
81+
method_parameters: dict,
82+
description: str) -> None:
83+
84+
self.__metadata_creator.create_file(self.parent_name,
85+
self.executor_name,
86+
module_path,
87+
class_name,
88+
self.class_method,
89+
self.executor_service_type)
90+
91+
self.__thread_pool.submit(self.__pipeline,
92+
module_path,
93+
method_parameters,
94+
description)
95+
96+
def update(self,
97+
module_path: str,
98+
method_parameters: dict,
99+
description: str) -> None:
100+
self.__metadata_creator.update_finished_flag(self.executor_name, False)
101+
102+
self.__thread_pool.submit(self.__pipeline,
103+
module_path,
104+
method_parameters,
105+
description)
106+
107+
def __pipeline(self,
108+
module_path: str,
109+
method_parameters: dict,
110+
description: str) -> None:
111+
try:
112+
importlib.import_module(module_path)
113+
model_instance = self.__storage.read(self.parent_name,
114+
self.parent_name_service_type)
115+
method_result = self.__execute_a_object_method(model_instance,
116+
self.class_method,
117+
method_parameters)
118+
self.__storage.save(method_result, self.executor_name,
119+
self.executor_service_type)
120+
self.__metadata_creator.update_finished_flag(self.executor_name,
121+
flag=True)
122+
123+
except Exception as exception:
124+
self.__metadata_creator.create_execution_document(
125+
self.executor_name,
126+
description,
127+
method_parameters,
128+
repr(exception))
129+
return None
130+
131+
self.__metadata_creator.create_execution_document(self.executor_name,
132+
description,
133+
method_parameters,
134+
)
135+
136+
def __execute_a_object_method(self, class_instance: object, method: str,
137+
parameters: dict) -> object:
138+
model_method = getattr(class_instance, method)
139+
140+
treated_parameters = self.__parameters_handler.treat(parameters)
141+
return model_method(**treated_parameters)
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
class Constants:
2+
MODULE_PATH_FIELD_NAME = "modulePath"
3+
CLASS_FIELD_NAME = "class"
4+
PARENT_NAME_FIELD_NAME = "parentName"
5+
NAME_FIELD_NAME = "name"
6+
FINISHED_FIELD_NAME = "finished"
7+
DESCRIPTION_FIELD_NAME = "description"
8+
METHOD_FIELD_NAME = "method"
9+
METHOD_PARAMETERS_FIELD_NAME = "methodParameters"
10+
TYPE_FIELD_NAME = "type"
11+
EXCEPTION_FIELD_NAME = "exception"
12+
13+
MODELS_VOLUME_PATH = "MODELS_VOLUME_PATH"
14+
BINARY_VOLUME_PATH = "BINARY_VOLUME_PATH"
15+
TRANSFORM_VOLUME_PATH = "TRANSFORM_VOLUME_PATH"
16+
17+
DELETED_MESSAGE = "deleted file"
18+
19+
HTTP_STATUS_CODE_SUCCESS = 200
20+
HTTP_STATUS_CODE_SUCCESS_CREATED = 201
21+
HTTP_STATUS_CODE_CONFLICT = 409
22+
HTTP_STATUS_CODE_NOT_ACCEPTABLE = 406
23+
HTTP_STATUS_CODE_NOT_FOUND = 404
24+
GET_METHOD_NAME = "GET"
25+
26+
DATABASE_URL = "DATABASE_URL"
27+
DATABASE_PORT = "DATABASE_PORT"
28+
DATABASE_NAME = "DATABASE_NAME"
29+
DATABASE_REPLICA_SET = "DATABASE_REPLICA_SET"
30+
31+
ID_FIELD_NAME = "_id"
32+
METADATA_DOCUMENT_ID = 0
33+
34+
MESSAGE_RESULT = "result"
35+
36+
MICROSERVICE_URI_SWITCHER = {
37+
"tune": "/api/learningOrchestra/v1/tune/",
38+
"train": "/api/learningOrchestra/v1/train/",
39+
"evaluate": "/api/learningOrchestra/v1/evaluate/",
40+
"predict": "/api/learningOrchestra/v1/predict/"
41+
}
42+
43+
DEFAULT_MODEL_TYPE = "defaultModel"
44+
TUNE_TYPE = "tune"
45+
TRAIN_TYPE = "train"
46+
EVALUATE_TYPE = "evaluate"
47+
PREDICT_TYPE = "predict"
48+
TRANSFORM_TYPE = "transform"
49+
PYTHON_TRANSFORM_TYPE = "pythonTransform"
50+
51+
MICROSERVICE_URI_PATH = "/binaryExecutor"
52+
MICROSERVICE_URI_GET_PARAMS = "?query={}&limit=20&skip=0"
53+
54+
FIRST_ARGUMENT = 0
55+
SECOND_ARGUMENT = 1
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pymongo==3.10.1
2+
flask==1.1.2
3+
datetime==4.3
4+
pytz==2020.1
5+
scikit-learn
6+
pandas==1.2.0

0 commit comments

Comments
 (0)