Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions .gov/be_pr_body.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# @ai-tool: Copilot

## Summary

- Annotate Dataverse citation fields with a `source` string (e.g., `codemeta.json`, `CITATION.cff`) purely for UI display.
- No change to submission payload or templates; unknown props ignored by consumers.
- Built and unit tests PASS locally via `make`.

## AI Provenance (required for AI-assisted changes)

- Prompt: Add Source column in UI; backend should annotate metadata with provenance per field.
- Model: GitHub Copilot gpt-5
- Date: 2025-09-16T10:00:00Z
- Author: @ErykKul
- Role: deployer

## Compliance checklist

- [x] No secrets/PII
- [ ] Transparency notice updated (if user-facing)
- [x] Agent logging enabled (actions/decisions logged)
- [x] Kill-switch / feature flag present for AI features
- [x] No prohibited practices under EU AI Act
- [x] Human oversight retained (required if high-risk or agent mode)
Risk classification: limited
Personal data: no
DPIA: N/A
Automated decision-making: no
Agent mode used: yes
GPAI obligations: N/A
Vendor GPAI compliance reviewed: N/A
- [x] License/IP attestation
Attribution: N/A

### Change-type specifics

- Security review: N/A
- Backend/API changed:
- ASVS: N/A
- Log retention policy: N/A

## Tests & Risk

- [x] Unit/integration tests added/updated
- [x] Security scan passed
Rollback plan: Revert PR
Smoke test: N/A
- [x] Docs updated (if needed)
97 changes: 97 additions & 0 deletions image/app/common/get_metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ func getMetadata(ctx context.Context, getMetadataRequest types.GetMetadataReques
return nil, err
}
md := types.MetadataStruct{Author: []types.Author{{AuthorName: jsonEscape(fmt.Sprintf("%v, %v", userObj.Data.LastName, userObj.Data.FirstName))}}}
// Collect provenance for each Dataverse citation field populated from repo files
sourceByField := map[string]string{}

nodemap := map[string]tree.Node{}
for _, v := range getMetadataRequest.CompareResult.Data {
Expand All @@ -107,6 +109,7 @@ func getMetadata(ctx context.Context, getMetadataRequest types.GetMetadataReques
if err != nil {
return nil, err
}
// No explicit source label for generic plugin metadata to keep UI focused on file-based provenance
md = mergeMetadata(moreMd, md)
}

Expand All @@ -116,6 +119,7 @@ func getMetadata(ctx context.Context, getMetadataRequest types.GetMetadataReques
if err != nil {
return nil, err
}
recordProvenance(sourceByField, "ro-crate.json", moreMd)
md = mergeMetadata(moreMd, md)
}

Expand All @@ -125,6 +129,7 @@ func getMetadata(ctx context.Context, getMetadataRequest types.GetMetadataReques
if err != nil {
return nil, err
}
recordProvenance(sourceByField, "codemeta.json", moreMd)
md = mergeMetadata(moreMd, md)
}

Expand All @@ -134,6 +139,7 @@ func getMetadata(ctx context.Context, getMetadataRequest types.GetMetadataReques
if err != nil {
return nil, err
}
recordProvenance(sourceByField, "CITATION.cff", moreMd)
md = mergeMetadata(moreMd, md)
}

Expand All @@ -149,9 +155,100 @@ func getMetadata(ctx context.Context, getMetadataRequest types.GetMetadataReques
if err != nil {
return nil, err
}
// Add provenance hints for UI (non-breaking for consumers that ignore unknown props)
annotateSources(res, sourceByField)
return res, nil
}

// recordProvenance records which file populated which Dataverse field(s)
func recordProvenance(dst map[string]string, source string, md types.MetadataStruct) {
if md.Title != "" {
dst["title"] = source
}
if len(md.AlternativeTitle) > 0 {
dst["alternativeTitle"] = source
}
if len(md.AlternativeURL) > 0 {
dst["alternativeURL"] = source
}
if len(md.OtherId) > 0 {
dst["otherId"] = source
}
if len(md.DsDescription) > 0 {
dst["dsDescription"] = source
}
if len(md.Keyword) > 0 {
dst["keyword"] = source
}
if len(md.ContributorName) > 0 {
dst["contributor"] = source
}
if len(md.GrantNumber) > 0 {
dst["grantNumber"] = source
}
if len(md.RelatedMaterialCitation) > 0 {
dst["relatedMaterial"] = source
}
if len(md.Author) > 0 {
if _, exists := dst["author"]; !exists {
dst["author"] = source
}
}
}

// annotateSources sets a "source" key on top-level citation fields based on collected provenance
func annotateSources(res types.Metadata, sourceByField map[string]string) {
dv, ok := res["datasetVersion"].(map[string]interface{})
if !ok {
return
}
mbs, ok := dv["metadataBlocks"].(map[string]interface{})
if !ok {
return
}
cit, ok := mbs["citation"].(map[string]interface{})
if !ok {
return
}
fields, ok := cit["fields"].([]interface{})
if !ok {
return
}
for _, f := range fields {
m, ok := f.(map[string]interface{})
if !ok {
continue
}
tn, _ := m["typeName"].(string)
src, exists := sourceByField[tn]
if exists && src != "" {
m["source"] = src
// For compound fields, propagate to each nested field object so UI leaf rows can show source, too.
if tc, _ := m["typeClass"].(string); tc == "compound" {
annotateCompoundFieldSources(m, src)
}
}
}
}

// annotateCompoundFieldSources propagates the source annotation to all nested child fields of a compound field.
func annotateCompoundFieldSources(field map[string]interface{}, src string) {
vals, ok := field["value"].([]interface{})
if !ok {
return
}
for _, entry := range vals {
entryMap, ok := entry.(map[string]interface{})
if !ok {
continue
}
for _, v := range entryMap {
if child, ok := v.(map[string]interface{}); ok {
child["source"] = src
}
}
}
}
func mergeMetadata(from, to types.MetadataStruct) types.MetadataStruct {
if from.Title != "" {
to.Title = jsonEscape(from.Title)
Expand Down