diff --git a/perma_web/api/tests/test_link_authorization.py b/perma_web/api/tests/test_link_authorization.py
index e162122d1..ff29a36a1 100644
--- a/perma_web/api/tests/test_link_authorization.py
+++ b/perma_web/api/tests/test_link_authorization.py
@@ -147,14 +147,10 @@ def test_should_allow_user_to_patch_with_file(self):
         #   capture were properly associated with actual web archive files, which is always
         #   the case outside of tests
         self.link.archive_timestamp = timezone.now() + timedelta(1)
-        self.link.warc_size = 1
         self.link.wacz_size = 1
         self.link.save()
 
-        # This link has a warc and a wacz
         self.link.refresh_from_db()
-        self.assertTrue(self.link.warc_size)
-        self.assertTrue(self.link.wacz_size)
 
         old_primary_capture = self.link.primary_capture
 
@@ -167,11 +163,12 @@ def test_should_allow_user_to_patch_with_file(self):
                                   data={'file':file_content})
 
         self.assertTrue(Capture.objects.filter(link_id=self.link.pk, role='primary').exclude(pk=old_primary_capture.pk).exists())
+        self.assertTrue(Capture.objects.filter(link_id=self.link.pk, role='provenance_summary').exists())
 
-        # This link now only has a warc, but not a wacz
         self.link.refresh_from_db()
-        self.assertTrue(self.link.warc_size)
-        self.assertFalse(self.link.wacz_size)
+
+        self.assertTrue(self.link.wacz_size)
+        self.assertTrue(self.link.wacz_size != 1)
 
 
     def test_should_reject_patch_with_file_for_out_of_window_link(self):
diff --git a/perma_web/api/tests/test_link_resource.py b/perma_web/api/tests/test_link_resource.py
index e08313ec0..6c2c4e386 100644
--- a/perma_web/api/tests/test_link_resource.py
+++ b/perma_web/api/tests/test_link_resource.py
@@ -94,7 +94,7 @@ def setUp(self):
             'private_reason',
         ]
 
-    def assertRecordsInArchive(self, link, upload=False, expected_records=None, check_screenshot=False, check_provenance_summary=False, filetype='wacz'):
+    def assertRecordsInArchive(self, link, upload=False, expected_records=None, check_screenshot=False, check_provenance_summary=False):
 
         def find_recording_in_warc(index, capture_url, content_type):
             warc_content_type = "application/http; msgtype=response"
@@ -129,8 +129,7 @@ def find_attachment_in_warc(index, capture_url):
         self.assertTrue(link.primary_capture.content_type, "Capture is missing a content type.")
 
         # create an index of the warc
-        extract = filetype == 'wacz'
-        with link.get_warc(extract) as warc_file:
+        with link.get_warc() as warc_file:
             index = index_warc_file(warc_file)
 
         # see if the index reports the content is in the warc
@@ -655,7 +654,7 @@ def test_should_create_archive_from_pdf_file(self):
                                        user=self.org_user)
 
             link = Link.objects.get(guid=obj['guid'])
-            self.assertRecordsInArchive(link, upload=True, filetype='warc')
+            self.assertRecordsInArchive(link, upload=True)
             self.assertEqual(link.primary_capture.user_upload, True)
 
     def test_should_create_archive_from_jpg_file(self):
@@ -666,7 +665,7 @@ def test_should_create_archive_from_jpg_file(self):
                                        user=self.org_user)
 
             link = Link.objects.get(guid=obj['guid'])
-            self.assertRecordsInArchive(link, upload=True, filetype='warc')
+            self.assertRecordsInArchive(link, upload=True)
             self.assertEqual(link.primary_capture.user_upload, True)
 
     def test_should_reject_jpg_file_with_invalid_url(self):
@@ -687,7 +686,7 @@ def test_should_should_create_archive_from_jpg_file_with_nonloading_url(self):
 
             link = Link.objects.get(guid=obj['guid'])
             self.assertEqual(link.submitted_url, 'http://asdf.asdf')
-            self.assertRecordsInArchive(link, upload=True, filetype='warc')
+            self.assertRecordsInArchive(link, upload=True)
             self.assertEqual(link.primary_capture.user_upload, True)
 
     def test_should_reject_invalid_file(self):
diff --git a/perma_web/api/views.py b/perma_web/api/views.py
index 38c602bf1..ef25f701d 100644
--- a/perma_web/api/views.py
+++ b/perma_web/api/views.py
@@ -620,7 +620,7 @@ def patch(self, request, guid, format=None):
                 link.mark_capturejob_superseded()
 
                 # write new warc and capture
-                link.write_uploaded_file(uploaded_file, cache_break=True)
+                link.write_uploaded_file(uploaded_file)
 
             # update internet archive if privacy changes
             if 'is_private' in data and was_private != bool(data.get("is_private")) and link.is_permanent():
diff --git a/perma_web/perma/models.py b/perma_web/perma/models.py
index 29e041b43..651a1bf21 100755
--- a/perma_web/perma/models.py
+++ b/perma_web/perma/models.py
@@ -49,13 +49,12 @@
     first_day_of_next_month,
     pp_date_from_post,
     prep_for_perma_payments,
-    preserve_perma_warc,
+    preserve_perma_wacz,
     process_perma_payments_transmission,
     protocol,
     remove_control_characters,
     today_next_year,
     tz_datetime,
-    write_resource_record_from_asset,
 )
 
 logger = logging.getLogger(__name__)
@@ -1974,9 +1973,9 @@ def get_pages_jsonl(self):
                 )
             return "\n".join([json.dumps(row) for row in jsonl_rows])
 
-    def write_uploaded_file(self, uploaded_file, cache_break=False):
+    def write_uploaded_file(self, uploaded_file):
         """
-            Given a file uploaded by a user, create a Capture record and warc.
+            Given a file uploaded by a user, create a Capture record and WACZ.
         """
         from api.utils import get_mime_type, mime_type_lookup  # local import to avoid circular import
 
@@ -1985,27 +1984,44 @@ def write_uploaded_file(self, uploaded_file, cache_break=False):
         file_name = f'upload.{mime_type_lookup[mime_type]["new_extension"]}'
         warc_url = f"file:///{self.guid}/{file_name}"
 
-        # append a random number to warc_url if we're replacing a file, to avoid browser cache
-        if cache_break:
-            r = random.SystemRandom()
-            warc_url += f"?version={str(r.random()).replace('.', '')}"
-
-        capture = Capture(link=self,
-                          role='primary',
-                          status='success',
-                          record_type='resource',
-                          user_upload='True',
-                          content_type=mime_type,
-                          url=warc_url)
-        warc_size = []  # pass a mutable container to the context manager, so that it can populate it with the size of the finished warc
-        with preserve_perma_warc(self.guid, self.creation_timestamp, self.warc_storage_file(), warc_size) as warc:
-            uploaded_file.file.seek(0)
-            write_resource_record_from_asset(uploaded_file.file.read(), warc_url, mime_type, warc)
+        upload_capture = Capture(
+            link=self,
+            role='primary',
+            status='success',
+            record_type='resource',
+            user_upload=True,
+            content_type=mime_type,
+            url=warc_url
+        )
+
+        provenance_capture = Capture(
+            link=self,
+            role='provenance_summary',
+            status='success',
+            record_type='resource',
+            user_upload=False,
+            content_type='text/html',
+            url='file:///provenance-summary.html'
+        )
+
+        # make the WACZ
+        self.wacz_size = preserve_perma_wacz(
+            uploaded_file,
+            warc_url,
+            mime_type,
+            self.guid,
+            self.submitted_url,
+            self.submitted_title,
+            self.creation_timestamp,
+            self.wacz_storage_file()
+        )
+        self.warc_size = 0  # necessary?
+
         self.captured_by_software = 'upload'
         self.captured_by_browser = None
-        self.warc_size = warc_size[0]
-        self.save(update_fields=['captured_by_software', 'captured_by_browser', 'warc_size'])
-        capture.save()
+        self.save(update_fields=['captured_by_software', 'captured_by_browser', 'warc_size', 'wacz_size'])
+        upload_capture.save()
+        provenance_capture.save()
 
     def safe_delete_warc(self):
         old_name = self.warc_storage_file()
diff --git a/perma_web/perma/settings/deployments/settings_common.py b/perma_web/perma/settings/deployments/settings_common.py
index f53054022..6e2aff991 100644
--- a/perma_web/perma/settings/deployments/settings_common.py
+++ b/perma_web/perma/settings/deployments/settings_common.py
@@ -625,6 +625,10 @@
 # Before deployment, we suppress the addition of new capture jobs when this file is present
 DEPLOYMENT_SENTINEL = '/tmp/perma-deployment-pending'
 
+# for inclusion in datapackage.json for user uploads; to be replaced with a
+# short commit hash in deployments
+PERMA_VERSION = 'dev'
+
 # Which settings should be available in all Django templates,
 # without needing to explicitly pass them via the view?
 TEMPLATE_VISIBLE_SETTINGS = (
diff --git a/perma_web/perma/templates/provenance-summary.html b/perma_web/perma/templates/provenance-summary.html
new file mode 100644
index 000000000..e3d529f92
--- /dev/null
+++ b/perma_web/perma/templates/provenance-summary.html
@@ -0,0 +1,103 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <title>Provenance summary for user upload for {{url}}</title>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    
+    <style>
+    * {
+      padding: 0px;
+      margin: 0px;
+      box-sizing: border-box;
+    }
+
+    html {
+      font-size: 16px;
+    }
+
+    video {
+      max-width: 100%;
+    }
+
+    main {
+      width: 100%;
+      padding: 1rem;
+      margin: auto;
+      max-width: 90ch;
+    }
+
+    section {
+      margin-bottom: 1rem;
+      padding-top: 1rem;
+      border-top: 1px solid gray;
+    }
+
+    h1, h2 {
+      margin-bottom: 0.5rem;
+    }
+
+    p {
+      font-size: 1rem;
+      line-height: 1.5rem;
+      margin-bottom: 0.5rem;
+    }
+
+    p span {
+      display: inline-block;
+      background-color: antiquewhite;
+      padding: 0.2rem;
+      padding-left: 0.35rem;
+      padding-right: 0.35rem;
+      border-radius: 0.25rem;
+    }
+
+    ul {
+      list-style-position: inside;
+    }
+
+    table {
+      table-layout: fixed;
+      border-collapse: collapse;
+      width: 100%;
+      text-align: left;
+    }
+
+    table * {
+      word-break: break-word;
+    }
+
+    table tr {
+      border-bottom: 1px solid lightgray;
+    }
+    
+    table tr td, table tr th {
+      padding: 0.75rem 0.25rem;
+    }
+
+    table tr td:first-of-type {
+      min-width: 34ch;
+    }
+
+    table tr:last-of-type {
+      border-bottom: 0px;
+    }
+    </style>
+
+  </head>
+
+  <body>
+    
+    <main>
+
+      <header>
+        <h1>Provenance Summary</h1>
+        <p>The data present in this capture, with MIME type <code>{{ mime_type }}</code>, were uploaded by a Perma user at {{ now }} to replace a failed or unsatisfactory capture of {{ url }} at {{ creation_timestamp }}.</p>
+      </header>
+        
+    </main>
+
+  </body>
+
+</html>
+
diff --git a/perma_web/perma/utils.py b/perma_web/perma/utils.py
index ba31a4255..64979c83c 100644
--- a/perma_web/perma/utils.py
+++ b/perma_web/perma/utils.py
@@ -1,20 +1,24 @@
 from collections import OrderedDict
-from contextlib import contextmanager
+from contextlib import contextmanager, redirect_stdout
 import csv
 from datetime import datetime, timedelta
 from datetime import timezone as tz
 from functools import reduce, wraps
 import hashlib
+import io
 import itertools
 import json
 import logging
 import operator
 import os
+import shutil
 import string
 import tempfile
 from typing import Literal, TypeVar
+import uuid
 import unicodedata
 from wsgiref.util import FileWrapper
+import zipfile
 
 from dateutil.relativedelta import relativedelta
 from django.conf import settings
@@ -33,6 +37,7 @@
     JsonResponse,
     StreamingHttpResponse,
 )
+from django.template import loader
 from django.urls import reverse
 from django.utils import timezone
 from django.views.decorators.debug import sensitive_variables
@@ -43,6 +48,7 @@
 import surt
 import tempdir
 from ua_parser import user_agent_parser
+from warcio.indexer import Indexer
 from warcio.warcwriter import BufferWARCWriter
 
 from perma.exceptions import (
@@ -498,26 +504,215 @@ def decrypt_from_perma_payments(ciphertext, encoder=encoding.Base64Encoder):
     return box.decrypt(ciphertext, encoder=encoder)
 
 #
-# warc writing
+# wacz writing
 #
 
-@contextmanager
-def preserve_perma_warc(guid, timestamp, destination, warc_size):
+def now():
+    """Returns the current time"""
+    return tuple(datetime.utcnow().timetuple()[:6])
+
+
+def parse_warc(warc_file, warc_url):
+    """ Gets length, digest, and offset for uploaded file as well as provenance file """
+    targets = [warc_url, "file:///provenance-summary.html"]
+    response = {target: {key: None for key in ["length", "digest", "offset"]} for target in targets}
+    f = io.StringIO()
+    with redirect_stdout(f):
+        indexer = Indexer(fields=["offset", "length", "warc-target-uri", "warc-block-digest"], inputs=[warc_file], output='-')
+        indexer.process_all()
+    out = f.getvalue()
+    index = [json.loads(o) for o in out.split("\n") if o]
+    for entry in index:
+        if "warc-target-uri" in entry:
+            for target in targets:
+                if entry["warc-target-uri"] == target:
+                    response[target]["length"] = entry["length"]
+                    response[target]["offset"] = entry["offset"]
+                    response[target]["digest"] = entry["warc-block-digest"]
+    return response
+
+
+def sha256(input_file, buf_size=65536):
+    """ Returns the SHA256 hexdigest of a file """
+    sha256 = hashlib.sha256()
+    with open(input_file, 'rb') as f:
+        while True:
+            data = f.read(buf_size)
+            if not data:
+                break
+            sha256.update(data)
+    return sha256.hexdigest()
+
+
+def preserve_perma_wacz(uploaded_file, warc_url, mime_type, guid, url, title, creation_timestamp, wacz_destination):
     """
-    Context manager for opening a perma warc, ready to receive warc records.
-    Safely closes and saves the file to storage when context is exited.
+    Creates and writes a perma WACZ for a user upload, returning the WACZ size.
+    This necessarily creates a WARC, but we no longer save it.
     """
-    # mode set to 'ab+' as a workaround for https://bugs.python.org/issue25341
-    out = tempfile.TemporaryFile('ab+')
-    write_perma_warc_header(out, guid, timestamp)
-    try:
-        yield out
-    finally:
-        out.flush()
-        warc_size.append(out.tell())
-        out.seek(0)
-        storages[settings.WARC_STORAGE].store_file(out, destination, overwrite=True)
-        out.close()
+    timestamp = datetime.utcnow()
+    ts_string = timestamp.isoformat() + "Z"
+    # Link's creation_timestamp has "+00:00" at the end, so
+    creation_ts_string = creation_timestamp.isoformat().partition("+")[0] + "Z"
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # prepare WARC...
+        warc_file = f"{tmpdir}/data.warc.gz"
+        warc = open(warc_file, 'ab+')
+        write_perma_warc_header(warc, guid, timestamp)
+
+        uploaded_file.file.seek(0)
+        write_resource_record_from_asset(uploaded_file.file.read(), warc_url, mime_type, ts_string, warc)
+
+        # create provenance summary and add it to the WARC
+        provenance = loader.get_template("provenance-summary.html")
+        context = {
+            "url": url,
+            "now": ts_string,
+            "mime_type": mime_type,
+            "creation_timestamp": creation_ts_string
+        }
+        write_resource_record_from_asset(
+            provenance.render(context).encode(),
+            "file:///provenance-summary.html",
+            "text/html",
+            ts_string,
+            warc
+        )
+        warc.close()
+
+        # ...set up pages.jsonl...
+        pages = [
+            {"format": "json-pages-1.0", "id": "pages", "title": "All Pages"},
+            {
+                "id": f"{uuid.uuid4()}",
+                "url": warc_url,
+                "title": f"User-uploaded file replacing failed capture of {url}",
+                "ts": ts_string
+            },
+            {
+                "id": f"{uuid.uuid4()}",
+                "url": "file:///provenance-summary.html",
+                "title": "Provenance Summary",
+                "ts": ts_string
+            }
+        ]
+        pages_bytes = "\n".join([json.dumps(page) for page in pages]).encode("utf-8")
+
+        # ...CDXJ index...
+        targets = ["file:///provenance-summary.html", warc_url]
+        selected_warc_headers = parse_warc(warc_file, warc_url)
+        cdxj = {
+            target: json.dumps({
+                "url": target,
+                "mime": "text/html" if target.endswith(".html") else mime_type,
+                "status": 200,
+                "digest": selected_warc_headers[target]["digest"],
+                "length": selected_warc_headers[target]["length"],
+                "offset": selected_warc_headers[target]["offset"],
+                "filename":"data.warc.gz"
+            }).replace(" ", "") for target in targets
+        }
+        ts = timestamp.strftime("%Y%m%d%H%M%S")
+        index = "\n".join(
+            [
+                f"{target} {ts} {cdxj[target]}"
+                for target in targets
+            ]
+        )
+
+        # ...datapackage...
+        datapackage = {
+            "profile": "data-package",
+            "wacz_version": "1.1.1",
+            "title": title,
+            "description": f"User upload for {url}",
+            "mainPageURL": warc_url,
+            "created": ts_string,
+            "software": f"Perma.cc {settings.PERMA_VERSION}",
+            "resources": [
+                {
+                    "name": "pages.jsonl",
+                    "path": "pages/pages.jsonl",
+                    "hash": "sha256:" + hashlib.sha256(pages_bytes).hexdigest(),
+                    "bytes": len(pages_bytes)
+                },
+                {
+                    "name": "index.cdx",
+                    "path": "indexes/index.cdx",
+                    "hash": "sha256:" + hashlib.sha256(index.encode()).hexdigest(),
+                    "bytes": len(index)
+                },
+                {
+                    "name": "data.warc.gz",
+                    "path": "archive/data.warc.gz",
+                    "hash": "sha256:" + sha256(warc_file),
+                    "bytes": os.stat(warc_file).st_size
+                }
+            ],
+        }
+        # ...and datapackage digest
+        datapackage_digest = {
+            "path": "datapackage.json",
+            "hash": hashlib.sha256(json.dumps(datapackage).encode()).hexdigest()
+        }
+
+        # Now we can create the WACZ file...
+        wacz_file = f"{tmpdir}/{guid}.wacz"
+        wacz = zipfile.ZipFile(wacz_file, "w")
+
+        # add index
+        index_file = zipfile.ZipInfo("indexes/index.cdx", now())
+        index_file.compress_type = zipfile.ZIP_DEFLATED
+        wacz.writestr(index_file, index.encode("utf-8"))
+
+        # add pages.jsonl
+        pages_file = zipfile.ZipInfo("pages/pages.jsonl", now())
+        pages_file.compress_type = zipfile.ZIP_DEFLATED
+        wacz.writestr(pages_file, pages_bytes)
+
+        # add WARC file
+        archive_file = zipfile.ZipInfo.from_file(
+            warc_file, "archive/data.warc.gz"
+        )
+        with wacz.open(archive_file, "w") as out_fh:
+            with open(warc_file, "rb") as in_fh:
+                shutil.copyfileobj(in_fh, out_fh)
+
+        # add datapackage
+        datapackage_file = zipfile.ZipInfo("datapackage.json", now())
+        datapackage_file.compress_type = zipfile.ZIP_DEFLATED
+        wacz.writestr(
+            datapackage_file,
+            json.dumps(datapackage).encode("utf-8")
+        )
+
+        # and datapackage digest
+        datapackage_digest_file = zipfile.ZipInfo(
+            "datapackage-digest.json", now()
+        )
+        datapackage_digest_file.compress_type = zipfile.ZIP_DEFLATED
+        wacz.writestr(
+            datapackage_digest_file,
+            json.dumps(datapackage_digest).encode("utf-8")
+        )
+
+        # and close the file
+        wacz.close()
+
+        # now store it
+        with open(wacz_file, "rb") as f:
+            storages[settings.WACZ_STORAGE].store_file(f, wacz_destination, overwrite=True)
+
+        wacz_size = os.stat(wacz_file).st_size
+
+        # (no need to clean up, because the context manager will do it)
+
+    # ...and return the size
+    return wacz_size
+
+#
+# warc writing
+#
 
 def write_perma_warc_header(out_file, guid, timestamp):
     # build warcinfo header
@@ -528,7 +723,7 @@ def write_perma_warc_header(out_file, guid, timestamp):
     ]
     warcinfo_fields = [
         b'operator: Perma.cc',
-        b'format: WARC File Format 1.0',
+        b'format: WARC file version 1.0',
         bytes(f'Perma-GUID: {guid}', 'utf-8')
     ]
     data = b'\r\n'.join(warcinfo_fields) + b'\r\n'
@@ -553,7 +748,7 @@ def make_detailed_warcinfo(filename, guid, coll_title, coll_desc, rec_title, pag
     writer = BufferWARCWriter(gzip=True)
     params = OrderedDict([('operator', 'Perma.cc download'),
                           ('Perma-GUID', guid),
-                          ('format', 'WARC File Format 1.0'),
+                          ('format', 'WARC file version 1.0'),
                           ('json-metadata', json.dumps(coll_metadata))])
 
     record = writer.create_warcinfo_record(filename, params)
@@ -568,18 +763,17 @@ def make_detailed_warcinfo(filename, guid, coll_title, coll_desc, rec_title, pag
     return writer.get_contents()
 
 
-def write_resource_record_from_asset(data, url, content_type, out_file, extra_headers=None):
+def write_resource_record_from_asset(data, url, content_type, warc_date, out_file, extra_headers=None):
     """
     Constructs a single WARC resource record from an asset (screenshot, uploaded file, etc.)
     and writes to out_file.
     """
-    warc_date = warctools.warc.warc_datetime_str(timezone.now()).replace(b'+00:00Z', b'Z')
     headers = [
         (warctools.WarcRecord.TYPE, warctools.WarcRecord.RESOURCE),
         (warctools.WarcRecord.ID, warctools.WarcRecord.random_warc_uuid()),
-        (warctools.WarcRecord.DATE, warc_date),
+        (warctools.WarcRecord.DATE, bytes(warc_date, 'utf-8')),
         (warctools.WarcRecord.URL, bytes(url, 'utf-8')),
-        (warctools.WarcRecord.BLOCK_DIGEST, bytes(f'sha1:{hashlib.sha1(data).hexdigest()}', 'utf-8'))
+        (warctools.WarcRecord.BLOCK_DIGEST, bytes(f'sha256:{hashlib.sha256(data).hexdigest()}', 'utf-8'))
     ]
     if extra_headers:
         headers.extend(extra_headers)
diff --git a/perma_web/requirements.txt b/perma_web/requirements.txt
index bb2840f2f..77058783b 100644
--- a/perma_web/requirements.txt
+++ b/perma_web/requirements.txt
@@ -419,9 +419,9 @@ hypothesis==6.98.17 \
     --hash=sha256:313f64b9f9f95e12c8b5342466bef7f352d2608afeeb434817c039602b45f0c4 \
     --hash=sha256:bbd227000cc21a9686a00867f031479c3812d8ab076e4af1c813f6b3a50c98f5
     # via -r requirements.in
-idna==3.7 \
-    --hash=sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc \
-    --hash=sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0
+idna==3.10 \
+    --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \
+    --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3
     # via
     #   requests
     #   tldextract