From 5c9b4153f560e44f053000bcecd3752fbebc2604 Mon Sep 17 00:00:00 2001 From: "Alan B. Christie" <29806285+alanbchristie@users.noreply.github.com> Date: Tue, 6 Feb 2024 15:07:29 +0000 Subject: [PATCH] Fixes upload loophole (#517) * fix: Uploads now require proposal membership * fix: Minor log tweak * fix: Jobn execution now checks project membership * fix: Revert project check in Squonk - should be handled higher up * fix: Job execution now requires project membership * build: Fix build (local precommit problem) * ci: Update GitHub action versions * fix: Improved target experiment file handling * fix: Internal function rename * fix: Better control of security cache (new default of 2 minutes) --------- Co-authored-by: Alan Christie --- .github/workflows/build-dev.yaml | 4 +- .github/workflows/build-production.yaml | 4 +- .github/workflows/build-staging.yaml | 2 +- api/security.py | 62 ++++++++++++-------- fragalysis/settings.py | 26 ++++++--- viewer/squonk2_agent.py | 4 +- viewer/target_loader.py | 46 ++++++++------- viewer/views.py | 75 ++++++++++++++++++------- 8 files changed, 146 insertions(+), 77 deletions(-) diff --git a/.github/workflows/build-dev.yaml b/.github/workflows/build-dev.yaml index abdb5a89..689f42f5 100644 --- a/.github/workflows/build-dev.yaml +++ b/.github/workflows/build-dev.yaml @@ -81,7 +81,7 @@ jobs: pip install --requirement build-requirements.txt pre-commit run --all-files - name: Docker build - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v5 with: context: . tags: ${{ steps.vars.outputs.BE_NAMESPACE }}/fragalysis-backend:${{ env.GITHUB_REF_SLUG }} @@ -96,7 +96,7 @@ jobs: BE_IMAGE_TAG: ${{ env.GITHUB_REF_SLUG }} - name: Login to DockerHub if: steps.vars.outputs.push == 'true' - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} diff --git a/.github/workflows/build-production.yaml b/.github/workflows/build-production.yaml index 4a31d40b..b49d06df 100644 --- a/.github/workflows/build-production.yaml +++ b/.github/workflows/build-production.yaml @@ -128,7 +128,7 @@ jobs: pip install --requirement build-requirements.txt pre-commit run --all-files - name: Build - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v5 with: context: . tags: | @@ -145,7 +145,7 @@ jobs: BE_IMAGE_TAG: ${{ steps.vars.outputs.tag }} - name: Login to DockerHub if: steps.vars.outputs.push == 'true' - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} diff --git a/.github/workflows/build-staging.yaml b/.github/workflows/build-staging.yaml index fbbe4b31..35b70ff0 100644 --- a/.github/workflows/build-staging.yaml +++ b/.github/workflows/build-staging.yaml @@ -150,7 +150,7 @@ jobs: pip install --requirement build-requirements.txt pre-commit run --all-files - name: Docker build - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v5 with: context: . tags: ${{ steps.vars.outputs.BE_NAMESPACE }}/fragalysis-backend:${{ steps.vars.outputs.tag }} diff --git a/api/security.py b/api/security.py index fdd8e184..a46f2029 100644 --- a/api/security.py +++ b/api/security.py @@ -19,9 +19,11 @@ logger: logging.Logger = logging.getLogger(__name__) +# A list of cached security results. +# Results use the key 'RESULTS' and the collection time uses the key 'TIMESTAMP'. USER_LIST_DICT: Dict[str, Any] = {} - -connector: str = os.environ.get('SECURITY_CONNECTOR', 'ispyb') +# Period to cache user lists in seconds +USER_LIST_CACHE_SECONDS: int = settings.SECURITY_CONNECTOR_CACHE_MINUTES * 60 # example test: # from rest_framework.test import APIRequestFactory @@ -109,9 +111,9 @@ def get_conn() -> Optional[Connector]: def get_configured_connector() -> Optional[Union[Connector, SSHConnector]]: - if connector == 'ispyb': + if settings.SECURITY_CONNECTOR == 'ispyb': return get_conn() - elif connector == 'ssh_ispyb': + elif settings.SECURITY_CONNECTOR == 'ssh_ispyb': return get_remote_conn() return None @@ -122,9 +124,9 @@ def ping_configured_connector() -> bool: a connection can be made. """ conn: Optional[Union[Connector, SSHConnector]] = None - if connector == 'ispyb': + if settings.SECURITY_CONNECTOR == 'ispyb': conn = get_conn() - elif connector == 'ssh_ispyb': + elif settings.SECURITY_CONNECTOR == 'ssh_ispyb': conn = get_remote_conn() if conn is not None: conn.stop() @@ -175,17 +177,18 @@ def get_proposals_for_user_from_django(self, user): return prop_ids def needs_updating(self, user): - """Returns true of the data collected for a user is out of date. - It's simple, we just record the last collected timestamp and consider it - 'out of date' (i.e. more than an hour old). - """ - update_window = 3600 - if user.username not in USER_LIST_DICT: - USER_LIST_DICT[user.username] = {"RESULTS": [], "TIMESTAMP": 0} + """Returns true of the data collected for a user is out of date.""" current_time = time.time() - if current_time - USER_LIST_DICT[user.username]["TIMESTAMP"] > update_window: + if user.username not in USER_LIST_DICT: + USER_LIST_DICT[user.username] = {"RESULTS": [], "TIMESTAMP": current_time} + if ( + current_time - USER_LIST_DICT[user.username]["TIMESTAMP"] + >= USER_LIST_CACHE_SECONDS + ): + # Clear the cache (using the current time as the new timestamp) USER_LIST_DICT[user.username]["TIMESTAMP"] = current_time return True + # Cached results are still valid... return False def run_query_with_connector(self, conn, user): @@ -283,13 +286,26 @@ def get_proposals_for_user_from_ispyb(self, user): ) return cached_prop_ids - def get_proposals_for_user(self, user): - """Returns a list of proposals (public and private) that the user has access to.""" + def get_proposals_for_user(self, user, restrict_to_membership=False): + """Returns a list of proposals that the user has access to. + + If 'restrict_to_membership' is set only those proposals/visits where the user + is a member of the visit will be returned. Otherwise the 'public' + proposals/visits will also be returned. Typically 'restrict_to_membership' is + used for uploads/changes - this allows us to implement logic that (say) + only permits explicit members of public proposals to add/load data for that + project (restrict_to_membership=True), but everyone can 'see' public data + (restrict_to_membership=False). + """ assert user proposals = [] ispyb_user = os.environ.get("ISPYB_USER") - logger.debug("ispyb_user=%s", ispyb_user) + logger.debug( + "ispyb_user=%s restrict_to_membership=%s", + ispyb_user, + restrict_to_membership, + ) if ispyb_user: if user.is_authenticated: logger.info("Getting proposals from ISPyB...") @@ -301,11 +317,13 @@ def get_proposals_for_user(self, user): logger.info("Getting proposals from Django...") proposals = self.get_proposals_for_user_from_django(user) - # Now add in Target Access Strings that everyone has access to - # (unless we already have them) - for open_proposal in self.get_open_proposals(): - if open_proposal not in proposals: - proposals.append(open_proposal) + # We have all the proposals where the user has membership. + if not restrict_to_membership: + # Here we're not restricting proposals to those where the user is a member, + # so we add those projects/proposals that everyone has access to. + for open_proposal in self.get_open_proposals(): + if open_proposal not in proposals: + proposals.append(open_proposal) return proposals diff --git a/fragalysis/settings.py b/fragalysis/settings.py index c7dc124e..02409d1e 100644 --- a/fragalysis/settings.py +++ b/fragalysis/settings.py @@ -11,6 +11,7 @@ """ import os +import sys from datetime import timedelta import sentry_sdk @@ -32,13 +33,6 @@ PROPOSAL_SUPPORTED = True PROPOSAL_REQUIRED = True -# Authentication check when uploading files. -# This can be switched off to simplify development testing if required. -# Should always be True on production. -AUTHENTICATE_UPLOAD = True -if os.environ.get("AUTHENTICATE_UPLOAD") == 'False': - AUTHENTICATE_UPLOAD = False - # AnonymousUser should be the first record inserted into the auth_user table. ANONYMOUS_USER = 1 @@ -230,6 +224,15 @@ OIDC_STORE_ACCESS_TOKEN = True OIDC_STORE_ID_TOKEN = True +# Security/access control connector. +# Currently one of 'ispyb' or 'ssh_ispyb'. +SECURITY_CONNECTOR = os.environ.get('SECURITY_CONNECTOR', 'ispyb').lower() +# Number of minutes to cache security information for a user. +# Set to '0' to disable caching. +SECURITY_CONNECTOR_CACHE_MINUTES = int( + os.environ.get('SECURITY_CONNECTOR_CACHE_MINUTES', '2') +) + # SessionRefresh configuration. # There's only one item - the token expiry period, with a default of 15 minutes. # The default is 15 minutes if you don't set this value. @@ -243,6 +246,14 @@ # see api.utils for the 'deployment_mode_is_production()' function. DEPLOYMENT_MODE = os.environ.get("DEPLOYMENT_MODE", "production").upper() +# Authentication check when uploading files. +# This can be switched off to simplify development testing if required. +# It's asserted as True for 'production' mode. +AUTHENTICATE_UPLOAD = True +if os.environ.get("AUTHENTICATE_UPLOAD") == 'False': + assert DEPLOYMENT_MODE != "PRODUCTION" + AUTHENTICATE_UPLOAD = False + ROOT_URLCONF = "fragalysis.urls" STATIC_ROOT = os.path.join(PROJECT_ROOT, "static") @@ -442,6 +453,7 @@ 'console': { 'level': 'DEBUG', 'class': 'logging.StreamHandler', + 'stream': sys.stdout, 'formatter': 'simple', }, 'rotating': { diff --git a/viewer/squonk2_agent.py b/viewer/squonk2_agent.py index 6549a9a8..3d4f7936 100644 --- a/viewer/squonk2_agent.py +++ b/viewer/squonk2_agent.py @@ -746,11 +746,11 @@ def _verify_access(self, c_params: CommonParams) -> Squonk2AgentRv: target_access_string = self._get_target_access_string(access_id) assert target_access_string proposal_list: List[str] = self.__ispyb_safe_query_set.get_proposals_for_user( - user + user, restrict_to_membership=True ) if not target_access_string in proposal_list: msg = ( - f'The user ({user.username}) cannot access "{target_access_string}"' + f'The user ({user.username}) cannot modify "{target_access_string}"' f' (access_id={access_id}). Only {proposal_list})' ) _LOGGER.warning(msg) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 465d18ad..3c7a5660 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -188,7 +188,7 @@ def _validate_bundle_against_mode(config_yaml: Dict[str, Any]) -> Optional[str]: # Initial concern - the loader's git information. # It must not be 'dirty' and must have a valid 'tag'. xca_git_info_key = "xca_git_info" - base_error_msg = "Stack is in PRODUCTION mode - and " + base_error_msg = "Stack is in PRODUCTION mode - and" try: xca_git_info = config_yaml[xca_git_info_key] except KeyError: @@ -1826,11 +1826,17 @@ def load_target( archive.extractall(target_loader.raw_data) msg = f"Data extraction complete: {data_bundle}" logger.info("%s%s", target_loader.report.task_id, msg) - except FileNotFoundError as exc: - msg = f"{data_bundle} file does not exist" - logger.exception("%s%s", target_loader.report.task_id, msg) - target_loader.experiment_upload.message = exc.args[0] - raise FileNotFoundError(msg) from exc + except Exception as exc: + # Handle _any_ underlying problem with the file. + logger.error('Got an exception opening the file: %s', str(exc)) + target_loader.report.log( + logging.ERROR, + f"Decompression of '{bundle_filename}' has failed. Is it a Target Experiment file?", + ) + target_loader.report.final( + f"Failed to decompress '{target_loader.data_bundle}'", success=False + ) + return target_loader.report.log(logging.INFO, f"Decompressed '{bundle_filename}'") @@ -1849,23 +1855,23 @@ def load_target( # Any problem with the underlying data is transmitted in the report. logger.debug(exc, exc_info=True) target_loader.report.final( - f"Uploading {target_loader.data_bundle} failed", success=False + f"Failed to process '{target_loader.data_bundle}'", success=False ) return - else: - # Move the uploaded file to its final location - target_loader.abs_final_path.mkdir(parents=True) - target_loader.raw_data.rename(target_loader.abs_final_path) - Path(target_loader.bundle_path).rename( - target_loader.abs_final_path.parent.joinpath(target_loader.data_bundle) - ) + _move_and_save_target_experiment(target_loader) - set_directory_permissions(target_loader.abs_final_path, 0o755) - target_loader.report.final( - f"{target_loader.data_bundle} uploaded successfully" - ) - target_loader.experiment_upload.message = target_loader.report.json() +def _move_and_save_target_experiment(target_loader): + # Move the uploaded file to its final location + target_loader.abs_final_path.mkdir(parents=True) + target_loader.raw_data.rename(target_loader.abs_final_path) + Path(target_loader.bundle_path).rename( + target_loader.abs_final_path.parent.joinpath(target_loader.data_bundle) + ) + + set_directory_permissions(target_loader.abs_final_path, 0o755) - target_loader.experiment_upload.save() + target_loader.report.final(f"{target_loader.data_bundle} uploaded successfully") + target_loader.experiment_upload.message = target_loader.report.json() + target_loader.experiment_upload.save() diff --git a/viewer/views.py b/viewer/views.py index fa2ce66b..6112f7ba 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -380,32 +380,18 @@ def post(self, request): logger.info('User=%s', str(request.user)) # logger.info('Auth=%s', str(request.auth)) - # Only authenticated users can upload files - # - this can be switched off in settings.py. - user = self.request.user - if not user.is_authenticated and settings.AUTHENTICATE_UPLOAD: - context = {} - context['error_message'] = ( - 'Only authenticated users can upload files' - ' - please navigate to landing page and Login' - ) - return render(request, 'viewer/upload-cset.html', context) - form = CSetForm(request.POST, request.FILES) if form.is_valid(): # Get all the variables needed from the form. # The fields we use will be based on the 'submit_choice', # expected to be one of V (validate), U (upload) or D delete choice = request.POST['submit_choice'] - - # Generate run-time error if the required form fields - # are not set based on the choice made... - - # The 'sdf_file' anf 'target_name' are only required for upload/update + # The 'sdf_file' and 'target_name' are only required for upload/update sdf_file = request.FILES.get('sdf_file') target = request.POST.get('target_name') update_set = request.POST.get('update_set') + user = self.request.user logger.info( '+ UploadCSet POST user.id=%s choice="%s" target="%s" update_set="%s"', user.id, @@ -474,6 +460,32 @@ def post(self, request): ) return redirect('viewer:upload_cset') + # You cannot validate or upload a set + # unless the user is part of the Target's project (proposal) + # even if the target is 'open'. + if choice in ['V', 'U'] and settings.AUTHENTICATE_UPLOAD: + assert target + context = {} + # The target must be part of a proposal that the user is a member of. + target_record = models.Target.objects.filter(title=target).first() + if not target_record: + context['error_message'] = f'Unknown Target ({target})' + return render(request, 'viewer/upload-cset.html', context) + # What proposals is the user a member of? + ispyb_safe_query_set = ISpyBSafeQuerySet() + user_proposals = ispyb_safe_query_set.get_proposals_for_user( + user, restrict_to_membership=True + ) + user_is_member = any( + target_project.title in user_proposals + for target_project in target_record.project_id.all() + ) + if not user_is_member: + context[ + 'error_message' + ] = f"You cannot load compound sets for '{target}'. You are not a member of any of its Proposals" + return render(request, 'viewer/upload-cset.html', context) + # Save uploaded sdf and zip to tmp storage tmp_pdb_file = None tmp_sdf_file = None @@ -1546,14 +1558,16 @@ def create(self, request, *args, **kwargs): if not user.is_authenticated: return redirect(settings.LOGIN_URL) else: - if target_access_string not in self.get_proposals_for_user(user): + if target_access_string not in self.get_proposals_for_user( + user, restrict_to_membership=True + ): return Response( { "target_access_string": [ - f"User {user} is not authorized to upload data to {target_access_string}" + f"You are not authorized to upload data to '{target_access_string}'" ] }, - status=status.HTTP_400_BAD_REQUEST, + status=status.HTTP_403_FORBIDDEN, ) # memo to self: cannot use TemporaryDirectory here because task @@ -2050,9 +2064,10 @@ def get(self, request): def post(self, request): logger.info('+ JobRequest.post') - # Only authenticated users can create squonk job requests. + # Only authenticated users can create squonk job requests + # (unless 'AUTHENTICATE_UPLOAD' is False in settings.py) user = self.request.user - if not user.is_authenticated: + if not user.is_authenticated and settings.AUTHENTICATE_UPLOAD: content: Dict[str, Any] = {'error': 'Only authenticated users can run jobs'} return Response(content, status=status.HTTP_403_FORBIDDEN) @@ -2074,6 +2089,24 @@ def post(self, request): logger.info('+ snapshot_id=%s', snapshot_id) logger.info('+ session_project_id=%s', session_project_id) + # Project must exist. + project: Optional[models.Project] = models.Project.objects.filter( + id=access_id + ).first() + if not project: + content = {'error': f'Access ID (Project) {access_id} does not exist'} + return Response(content, status=status.HTTP_404_NOT_FOUND) + # The user must be a member of the target access string. + # (when AUTHENTICATE_UPLOAD is set) + if settings.AUTHENTICATE_UPLOAD: + ispyb_safe_query_set = ISpyBSafeQuerySet() + user_proposals = ispyb_safe_query_set.get_proposals_for_user( + user, restrict_to_membership=True + ) + if project.title not in user_proposals: + content = {'error': f"You are not a member of '{project.title}'"} + return Response(content, status=status.HTTP_403_FORBIDDEN) + # Check the user can use this Squonk2 facility. # To do this we need to setup a couple of API parameter objects. # We don't (at this point) care about the Job spec or callback URL.