Skip to content

Commit

Permalink
Merge pull request brain-score#45 from brain-score/catalog_fix
Browse files Browse the repository at this point in the history
Updates package.py to account for Type None catalogs
  • Loading branch information
mike-ferguson authored Mar 18, 2024
2 parents 03aef71 + f56b4ad commit bb21883
Showing 1 changed file with 39 additions and 29 deletions.
68 changes: 39 additions & 29 deletions brainio/packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,24 +164,32 @@ def package_stimulus_set(catalog_name, proto_stimulus_set, stimulus_set_identifi
proto_stimulus_set['filename'] = zip_filenames # keep record of zip (or later local) filenames
csv_sha1 = create_stimulus_csv(proto_stimulus_set, str(target_csv_path))
# upload both to S3
upload_to_s3(str(target_csv_path), bucket_name, target_s3_key=csv_file_name)
upload_to_s3(str(target_zip_path), bucket_name, target_s3_key=zip_file_name)
csv_object_properties = upload_to_s3(str(target_csv_path), bucket_name, target_s3_key=csv_file_name)
zip_object_properties = upload_to_s3(str(target_zip_path), bucket_name, target_s3_key=zip_file_name)
# link to csv and zip from same identifier. The csv however is the only one of the two rows with a class.
lookup.append(
catalog_identifier=catalog_name,
object_identifier=stimulus_set_identifier, cls='StimulusSet',
lookup_type=TYPE_STIMULUS_SET,
bucket_name=bucket_name, sha1=csv_sha1, s3_key=csv_file_name,
stimulus_set_identifier=None
)
lookup.append(
catalog_identifier=catalog_name,
object_identifier=stimulus_set_identifier, cls=None,
lookup_type=TYPE_STIMULUS_SET,
bucket_name=bucket_name, sha1=stimulus_zip_sha1, s3_key=zip_file_name,
stimulus_set_identifier=None
)
_logger.debug(f"stimulus set {stimulus_set_identifier} packaged")

if catalog_name is not None:
lookup.append(
catalog_identifier=catalog_name,
object_identifier=stimulus_set_identifier, cls='StimulusSet',
lookup_type=TYPE_STIMULUS_SET,
bucket_name=bucket_name, sha1=csv_sha1, s3_key=csv_file_name,
stimulus_set_identifier=None
)
lookup.append(
catalog_identifier=catalog_name,
object_identifier=stimulus_set_identifier, cls=None,
lookup_type=TYPE_STIMULUS_SET,
bucket_name=bucket_name, sha1=stimulus_zip_sha1, s3_key=zip_file_name,
stimulus_set_identifier=None
)
csv_version_id = csv_object_properties['VersionId'] if 'VersionId' in csv_object_properties else None
zip_version_id = zip_object_properties['VersionId'] if 'VersionId' in zip_object_properties else None
_logger.debug(f"stimulus set {stimulus_set_identifier} packaged:\n bucket={bucket_name}, csv_sha1={csv_sha1},"
f"zip_sha1={stimulus_zip_sha1}, csv_version_id={csv_version_id}, zip_version_id={zip_version_id}")
return {"identifier": stimulus_set_identifier, "bucket": bucket_name, "csv_sha1": csv_sha1,
"zip_sha1": stimulus_zip_sha1, "csv_version_id": csv_version_id, "zip_version_id": zip_version_id}



def write_netcdf(assembly, target_netcdf_file, append=False, group=None, compress=True):
Expand Down Expand Up @@ -247,8 +255,6 @@ def package_data_assembly(catalog_identifier, proto_data_assembly, assembly_iden
assembly = assembly_class(proto_data_assembly)
assembly.attrs['stimulus_set_identifier'] = stimulus_set_identifier
assembly.validate()
assert stimulus_set_identifier in list_stimulus_sets(), \
f"StimulusSet {stimulus_set_identifier} not found in packaged stimulus sets"

# identifiers
assembly_store_identifier = "assy_" + assembly_identifier.replace(".", "_")
Expand All @@ -263,13 +269,17 @@ def package_data_assembly(catalog_identifier, proto_data_assembly, assembly_iden
assert isinstance(ex, DataArray)
netcdf_kf_sha1 = write_netcdf(ex, target_netcdf_path, append=True, group=k)
object_properties = upload_to_s3(target_netcdf_path, bucket_name, s3_key)
lookup.append(
catalog_identifier=catalog_identifier,
object_identifier=assembly_identifier, stimulus_set_identifier=stimulus_set_identifier,
lookup_type=TYPE_ASSEMBLY,
bucket_name=bucket_name, sha1=netcdf_kf_sha1,
s3_key=s3_key, cls=assembly_class_name,
)
_logger.debug(f"assembly {assembly_identifier} packaged: sha1={netcdf_kf_sha1}" +
# log version_id if it's there (not all buckets are versioned)
(f", version_id={object_properties['VersionId']}" if 'VersionId' in object_properties else ""))

if catalog_identifier is not None:
lookup.append(
catalog_identifier=catalog_identifier,
object_identifier=assembly_identifier, stimulus_set_identifier=stimulus_set_identifier,
lookup_type=TYPE_ASSEMBLY,
bucket_name=bucket_name, sha1=netcdf_kf_sha1,
s3_key=s3_key, cls=assembly_class_name,
)
version_id = object_properties['VersionId'] if 'VersionId' in object_properties else None
_logger.debug(f"assembly {assembly_identifier} packaged:\n, version_id={version_id}, sha1={netcdf_kf_sha1}, "
f"bucket_name={bucket_name}, cls={assembly_class_name}")
return {"identifier": assembly_identifier, "version_id": version_id, "sha1": netcdf_kf_sha1,
"bucket": bucket_name, "cls": assembly_class_name}

0 comments on commit bb21883

Please sign in to comment.