diff --git a/clipper_admin/clipper_admin/clipper_admin.py b/clipper_admin/clipper_admin/clipper_admin.py index 63c17ea39..1089d113e 100644 --- a/clipper_admin/clipper_admin/clipper_admin.py +++ b/clipper_admin/clipper_admin/clipper_admin.py @@ -525,7 +525,9 @@ def build_model(self, context_file.seek(0) image = "{cluster}-{name}:{version}".format( cluster=self.cm.cluster_identifier, name=name, version=version) - + if container_registry is not None: + image = "{reg}/{image}".format( + reg=container_registry, image=image) docker_client = docker.from_env() self.logger.info( "Building model Docker image with model data from {}".format( @@ -536,19 +538,12 @@ def build_model(self, if 'stream' in b and b['stream'] != '\n': #log build steps only self.logger.info(b['stream'].rstrip()) - tagged_image = image - if container_registry is not None: - tagged_image = "{reg}/{image}".format(reg=container_registry, image=image) - else: - self.logger.info('Container registry is not set') - docker_client.images.tag(image, tagged_image) - self.logger.info("Pushing model Docker image to {}".format(image)) @retry((docker.errors.APIError, TimeoutError, Timeout), tries=5, logger=self.logger) def _push_model(): - for line in docker_client.images.push(repository=tagged_image, stream=True): + for line in docker_client.images.push(repository=image, stream=True): self.logger.debug(line) _push_model() diff --git a/clipper_admin/clipper_admin/nomad/mgmt_deployment.py b/clipper_admin/clipper_admin/nomad/mgmt_deployment.py index 3ddc05dc0..c83e12ea5 100644 --- a/clipper_admin/clipper_admin/nomad/mgmt_deployment.py +++ b/clipper_admin/clipper_admin/nomad/mgmt_deployment.py @@ -1,8 +1,6 @@ -from .utils import nomad_job_prefix +from .utils import nomad_job_prefix, mgmt_job_prefix, mgmt_check import os -def mgmt_job_prefix(cluster_name): - return '{}-mgmt'.format(nomad_job_prefix(cluster_name)) """ Nomad payload to deploy a new mgmt """ def mgmt_deployment(job_id, datacenters, cluster_name, image, redis_ip, redis_port, num_replicas): @@ -41,7 +39,7 @@ def mgmt_deployment(job_id, datacenters, cluster_name, image, redis_ip, redis_po }, 'Services': [ { - 'Name': '{}-mgmt'.format(nomad_job_prefix(cluster_name)), + 'Name': mgmt_check(cluster_name), 'Tags': ['machine-learning', 'model', 'clipper', 'mgmt'], 'PortLabel': 'http', 'Checks': [ diff --git a/clipper_admin/clipper_admin/nomad/model_deployment.py b/clipper_admin/clipper_admin/nomad/model_deployment.py index 667f7116b..a4e079c1e 100644 --- a/clipper_admin/clipper_admin/nomad/model_deployment.py +++ b/clipper_admin/clipper_admin/nomad/model_deployment.py @@ -1,14 +1,4 @@ - -from .utils import nomad_job_prefix - -def model_job_prefix(cluster_name): - return '{}-model'.format(nomad_job_prefix(cluster_name)) - -def generate_model_job_name(cluster_name, model_name, model_version): - return '{}-{}-{}'.format(model_job_prefix(cluster_name), model_name, model_version) - -def model_check_name(cluster_name, name, version): - return '{}-model-{}-{}'.format(nomad_job_prefix(cluster_name), name, version) +from .utils import nomad_job_prefix, model_job_prefix, generate_model_job_name, model_check_name """ Nomad payload to deploy a new model """ def model_deployment( diff --git a/clipper_admin/clipper_admin/nomad/nomad_container_manager.py b/clipper_admin/clipper_admin/nomad/nomad_container_manager.py index 7a3dc6516..806d25ff7 100644 --- a/clipper_admin/clipper_admin/nomad/nomad_container_manager.py +++ b/clipper_admin/clipper_admin/nomad/nomad_container_manager.py @@ -15,6 +15,8 @@ from .query_frontend_deployment import query_frontend_deployment from .utils import nomad_job_prefix, query_frontend_job_prefix, query_frontend_service_check, query_frontend_rpc_check +from .utils import mgmt_job_prefix, mgmt_check +from .utils import redis_job_prefix, redis_check from dns.resolver import NXDOMAIN @@ -147,7 +149,7 @@ def start_clipper(self, def _start_redis(self, sleep_time=5): # If an existing Redis service isn't provided, start one if self.redis_ip is None: - job_id = 'redis' + job_id = redis_job_prefix(self.cluster_name) self.nomad.job.register_job(job_id, redis_deployment(job_id, self.datacenters, self.cluster_name)) @@ -155,7 +157,8 @@ def _start_redis(self, sleep_time=5): wait_count = 0 redis_ip = None redis_port = None - check_name = '{}-redis'.format(nomad_job_prefix(self.cluster_name)) + check_name = redis_check(self.cluster_name) + while redis_ip is None: time.sleep(3) wait_count += 3 @@ -167,17 +170,32 @@ def _start_redis(self, sleep_time=5): self.logger.info('Redis is at {}:{}'.format(redis_ip, redis_port)) except NXDOMAIN as err: self.logger.warning('DNS query failed: {}'.format(err)) + self.redis_ip = redis_ip self.redis_port = redis_port def _start_mgmt(self, mgmt_image, num_replicas): - job_id = 'clipper-mgmt' - self.nomad.job.register_job(job_id, mgmt_deployment(job_id, self.datacenters, self.cluster_name, mgmt_image, self.redis_ip, self.redis_port, num_replicas)) + job_id = mgmt_job_prefix(self.cluster_name), + self.nomad.job.register_job( + job_id, + mgmt_deployment( + job_id, + self.datacenters, + self.cluster_name, + mgmt_image, + self.redis_ip, + self.redis_port, + num_replicas + ) + ) wait_count = 0 + mgmt_ip = None mgmt_port = None - check_name = '{}-mgmt'.format(nomad_job_prefix(self.cluster_name)) + + check_name = mgmt_check(self.cluster_name) + while mgmt_ip is None: time.sleep(3) wait_count += 3 @@ -195,7 +213,7 @@ def _start_mgmt(self, mgmt_image, num_replicas): def _start_query(self, query_image, frontend_exporter_image, cache_size, qf_http_thread_pool_size, qf_http_timeout_request, qf_http_timeout_content, num_replicas): - job_id = 'clipper-query-frontend' + job_id = query_frontend_job_prefix(self.cluster_name) self.nomad.job.register_job( job_id, query_frontend_deployment( @@ -217,6 +235,7 @@ def _start_query(self, query_image, frontend_exporter_image, cache_size, wait_count = 0 query_ip = None query_port = None + while query_ip is None: time.sleep(3) wait_count += 3 @@ -232,9 +251,7 @@ def _start_query(self, query_image, frontend_exporter_image, cache_size, self.query_port = query_port def _resolve_query_ip(self): - check_name = 'check-service-query-frontend' - query_ip, query_port = self.dns.resolveSRV(check_name) - return (query_ip, query_port) + return self._resolve_query_frontend_service() """ This function queries the DNS server with a SRV request to get ip and port for the query frontend service (REST API) @@ -259,9 +276,7 @@ def connect(self): pass def deploy_model(self, name, version, input_type, image, num_replicas=1): - check_name = model_check_name(self.cluster_name, name, version) - job_id = '{}-{}-{}'.format(model_job_prefix(self.cluster_name), name, version) - + job_id = generate_model_job_name(self.cluster_name, name, version) if self.load_balancer != None: query_frontend_ip = self.load_balancer.ip @@ -349,13 +364,12 @@ def stop_all(self, graceful=True): def get_admin_addr(self): - check_name = '{}-mgmt'.format(nomad_job_prefix(self.cluster_name)) + check_name = mgmt_check(self.cluster_name) mgmt_ip, mgmt_port = self.dns.resolveSRV(check_name) return '{}:{}'.format(mgmt_ip, mgmt_port) def get_query_addr(self): - - check_name = 'check-service-query-frontend' + check_name = query_frontend_service_check(self.cluster_name) try: query_ip, query_port= self.dns.resolveSRV(check_name) self.query_ip = query_ip diff --git a/clipper_admin/clipper_admin/nomad/redis_deployment.py b/clipper_admin/clipper_admin/nomad/redis_deployment.py index fe1e78334..52cd5fd76 100644 --- a/clipper_admin/clipper_admin/nomad/redis_deployment.py +++ b/clipper_admin/clipper_admin/nomad/redis_deployment.py @@ -1,4 +1,4 @@ -from .utils import nomad_job_prefix +from .utils import nomad_job_prefix, redis_job_prefix, redis_check """ Nomad payload to deploy Redis """ def redis_deployment(job_id, datacenters, cluster_name): @@ -20,7 +20,7 @@ def redis_deployment(job_id, datacenters, cluster_name): }, 'Tasks': [ { - 'Name': '{}-redis'.format(nomad_job_prefix(cluster_name)), + 'Name': redis_job_prefix(cluster_name), 'Driver': 'docker', 'Config': { 'image': 'redis:alpine', @@ -39,7 +39,7 @@ def redis_deployment(job_id, datacenters, cluster_name): }, 'Services': [ { - 'Name': '{}-redis'.format(nomad_job_prefix(cluster_name)), + 'Name': redis_check(cluster_name), 'Tags': ['global', 'cache'], 'PortLabel': 'db', 'Checks': [ diff --git a/clipper_admin/clipper_admin/nomad/utils.py b/clipper_admin/clipper_admin/nomad/utils.py index 79b29ef5f..b3c12446e 100644 --- a/clipper_admin/clipper_admin/nomad/utils.py +++ b/clipper_admin/clipper_admin/nomad/utils.py @@ -2,6 +2,15 @@ def nomad_job_prefix(cluster_name): return 'clipper-{}'.format(cluster_name) + +""" + redis +""" +def redis_job_prefix(cluster_name): + return '{}-redis'.format(nomad_job_prefix(cluster_name)) +def redis_check(cluster_name): + return '{}-db'.format(mgmt_check(cluster_name)) + """ query frontend """ @@ -11,3 +20,23 @@ def query_frontend_service_check(cluster_name): return '{}-service'.format(query_frontend_job_prefix(cluster_name)) def query_frontend_rpc_check(cluster_name): return '{}-rpc'.format(query_frontend_job_prefix(cluster_name)) + +""" + mgmt +""" +def mgmt_job_prefix(cluster_name): + return '{}-mgmt'.format(nomad_job_prefix(cluster_name)) +def mgmt_check(cluster_name): + return '{}-http'.format(mgmt_check(cluster_name)) + +""" + model +""" +def model_job_prefix(cluster_name): + return '{}-model'.format(nomad_job_prefix(cluster_name)) + +def generate_model_job_name(cluster_name, model_name, model_version): + return '{}-{}-{}'.format(model_job_prefix(cluster_name), model_name, model_version) + +def model_check_name(cluster_name, model_name, model_version): + return '{}-model-{}-{}'.format(nomad_job_prefix(cluster_name), model_name, model_version)