|
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one |
| 3 | + * or more contributor license agreements. See the NOTICE file |
| 4 | + * distributed with this work for additional information |
| 5 | + * regarding copyright ownership. The ASF licenses this file |
| 6 | + * to you under the Apache License, Version 2.0 (the |
| 7 | + * "License"); you may not use this file except in compliance |
| 8 | + * with the License. You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, |
| 13 | + * software distributed under the License is distributed on an |
| 14 | + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 15 | + * KIND, either express or implied. See the License for the |
| 16 | + * specific language governing permissions and limitations |
| 17 | + * under the License. |
| 18 | + */ |
| 19 | + |
| 20 | +#pragma once |
| 21 | + |
| 22 | +/// \file iceberg/update/expire_snapshots.h |
| 23 | +/// API for removing old snapshots from a table |
| 24 | + |
| 25 | +#include <cstdint> |
| 26 | +#include <functional> |
| 27 | +#include <memory> |
| 28 | +#include <optional> |
| 29 | +#include <string_view> |
| 30 | +#include <vector> |
| 31 | + |
| 32 | +#include "iceberg/iceberg_export.h" |
| 33 | +#include "iceberg/pending_update.h" |
| 34 | +#include "iceberg/type_fwd.h" |
| 35 | + |
| 36 | +namespace iceberg { |
| 37 | + |
| 38 | +/// \brief Cleanup level for snapshot expiration |
| 39 | +/// |
| 40 | +/// Controls which files are deleted during snapshot expiration. |
| 41 | +enum class CleanupLevel { |
| 42 | + /// Skip all file cleanup, only remove snapshot metadata |
| 43 | + kNone, |
| 44 | + /// Clean up only metadata files (manifests, manifest lists, statistics), |
| 45 | + /// retain data files |
| 46 | + kMetadataOnly, |
| 47 | + /// Clean up both metadata and data files (default) |
| 48 | + kAll, |
| 49 | +}; |
| 50 | + |
| 51 | +/// \brief API for removing old snapshots from a table |
| 52 | +/// |
| 53 | +/// ExpireSnapshots accumulates snapshot deletions and commits the new snapshot |
| 54 | +/// list to the table. This API does not allow deleting the current snapshot. |
| 55 | +/// |
| 56 | +/// When committing, changes are applied to the latest table metadata. Commit |
| 57 | +/// conflicts are resolved by applying the changes to the new latest metadata |
| 58 | +/// and reattempting the commit. |
| 59 | +/// |
| 60 | +/// Manifest files that are no longer used by valid snapshots will be deleted. |
| 61 | +/// Data files that were deleted by snapshots that are expired will be deleted. |
| 62 | +/// DeleteWith() can be used to pass an alternative deletion method. |
| 63 | +/// |
| 64 | +/// Apply() returns a list of the snapshots that will be removed (preview mode). |
| 65 | +/// |
| 66 | +/// Example usage: |
| 67 | +/// \code |
| 68 | +/// table.ExpireSnapshots() |
| 69 | +/// .ExpireOlderThan(timestampMillis) |
| 70 | +/// .RetainLast(5) |
| 71 | +/// .Commit(); |
| 72 | +/// \endcode |
| 73 | +class ICEBERG_EXPORT ExpireSnapshots |
| 74 | + : public PendingUpdateTyped<std::vector<std::shared_ptr<Snapshot>>> { |
| 75 | + public: |
| 76 | + /// \brief Constructor for ExpireSnapshots operation |
| 77 | + /// |
| 78 | + /// \param table The table to expire snapshots from |
| 79 | + explicit ExpireSnapshots(Table* table); |
| 80 | + ~ExpireSnapshots() override = default; |
| 81 | + |
| 82 | + /// \brief Expire a specific snapshot identified by id |
| 83 | + /// |
| 84 | + /// Marks a specific snapshot for removal. This method can be called multiple |
| 85 | + /// times to expire multiple snapshots. Snapshots marked by this method will |
| 86 | + /// be expired even if they would be retained by RetainLast(). |
| 87 | + /// |
| 88 | + /// \param snapshot_id ID of the snapshot to expire |
| 89 | + /// \return Reference to this for method chaining |
| 90 | + ExpireSnapshots& ExpireSnapshotId(int64_t snapshot_id); |
| 91 | + |
| 92 | + /// \brief Expire all snapshots older than the given timestamp |
| 93 | + /// |
| 94 | + /// Sets a timestamp threshold - all snapshots created before this time will |
| 95 | + /// be expired (unless retained by RetainLast()). |
| 96 | + /// |
| 97 | + /// \param timestamp_millis Timestamp in milliseconds since epoch |
| 98 | + /// \return Reference to this for method chaining |
| 99 | + ExpireSnapshots& ExpireOlderThan(int64_t timestamp_millis); |
| 100 | + |
| 101 | + /// \brief Retain the most recent ancestors of the current snapshot |
| 102 | + /// |
| 103 | + /// If a snapshot would be expired because it is older than the expiration |
| 104 | + /// timestamp, but is one of the num_snapshots most recent ancestors of the |
| 105 | + /// current state, it will be retained. This will not prevent snapshots |
| 106 | + /// explicitly identified by ExpireSnapshotId() from expiring. |
| 107 | + /// |
| 108 | + /// This may keep more than num_snapshots ancestors if snapshots are added |
| 109 | + /// concurrently. This may keep less than num_snapshots ancestors if the |
| 110 | + /// current table state does not have that many. |
| 111 | + /// |
| 112 | + /// \param num_snapshots The number of snapshots to retain |
| 113 | + /// \return Reference to this for method chaining |
| 114 | + ExpireSnapshots& RetainLast(int num_snapshots); |
| 115 | + |
| 116 | + /// \brief Set a custom file deletion callback |
| 117 | + /// |
| 118 | + /// Passes an alternative delete implementation that will be used for |
| 119 | + /// manifests and data files. If this method is not called, unnecessary |
| 120 | + /// manifests and data files will still be deleted using the default method. |
| 121 | + /// |
| 122 | + /// Manifest files that are no longer used by valid snapshots will be deleted. |
| 123 | + /// Data files that were deleted by snapshots that are expired will be deleted. |
| 124 | + /// |
| 125 | + /// \param delete_func Callback function that will be called for each file to delete |
| 126 | + /// \return Reference to this for method chaining |
| 127 | + ExpireSnapshots& DeleteWith(std::function<void(std::string_view)> delete_func); |
| 128 | + |
| 129 | + /// \brief Configure the cleanup level for expired files |
| 130 | + /// |
| 131 | + /// This method provides fine-grained control over which files are cleaned up |
| 132 | + /// during snapshot expiration. |
| 133 | + /// |
| 134 | + /// Use CleanupLevel::kMetadataOnly when data files are shared across tables or |
| 135 | + /// when using procedures like add-files that may reference the same data files. |
| 136 | + /// |
| 137 | + /// Use CleanupLevel::kNone when data and metadata files may be more efficiently |
| 138 | + /// removed using a distributed framework through the actions API. |
| 139 | + /// |
| 140 | + /// \param level The cleanup level to use for expired snapshots |
| 141 | + /// \return Reference to this for method chaining |
| 142 | + ExpireSnapshots& SetCleanupLevel(CleanupLevel level); |
| 143 | + |
| 144 | + /// \brief Apply the pending changes and return the uncommitted result |
| 145 | + /// |
| 146 | + /// This does not result in a permanent update. |
| 147 | + /// |
| 148 | + /// \return the list of snapshots that would be expired, or an error: |
| 149 | + /// - ValidationFailed: if pending changes cannot be applied |
| 150 | + Result<std::vector<std::shared_ptr<Snapshot>>> Apply() override; |
| 151 | + |
| 152 | + /// \brief Apply and commit the pending changes to the table |
| 153 | + /// |
| 154 | + /// Changes are committed by calling the underlying table's commit operation. |
| 155 | + /// |
| 156 | + /// Once the commit is successful, the updated table will be refreshed. |
| 157 | + /// |
| 158 | + /// \return Status::OK if the commit was successful, or an error: |
| 159 | + /// - ValidationFailed: if update cannot be applied to current metadata |
| 160 | + /// - CommitFailed: if update cannot be committed due to conflicts |
| 161 | + Status Commit() override; |
| 162 | + |
| 163 | + // Non-copyable, movable (inherited from PendingUpdate) |
| 164 | + ExpireSnapshots(const ExpireSnapshots&) = delete; |
| 165 | + ExpireSnapshots& operator=(const ExpireSnapshots&) = delete; |
| 166 | + |
| 167 | + private: |
| 168 | + Table* table_; |
| 169 | + std::vector<int64_t> snapshot_ids_to_expire_; |
| 170 | + std::optional<int64_t> expire_older_than_ms_; |
| 171 | + std::optional<int> retain_last_; |
| 172 | + std::optional<std::function<void(std::string_view)>> delete_func_; |
| 173 | + CleanupLevel cleanup_level_ = CleanupLevel::kAll; |
| 174 | +}; |
| 175 | + |
| 176 | +} // namespace iceberg |
0 commit comments