Skip to content

Commit 4054234

Browse files
[CAS] Add a new API in ObjectStore to import a CAS tree
1 parent b28ed49 commit 4054234

File tree

4 files changed

+119
-3
lines changed

4 files changed

+119
-3
lines changed

llvm/include/llvm/CAS/ObjectStore.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,10 @@ class ObjectStore {
309309
/// Validate the whole node tree.
310310
Error validateTree(ObjectRef Ref);
311311

312+
/// Import object from another CAS. This will import the full tree from the
313+
/// another CAS.
314+
Expected<ObjectRef> importObject(ObjectStore &Upstream, ObjectRef Other);
315+
312316
/// Print the ObjectStore internals for debugging purpose.
313317
virtual void print(raw_ostream &) const {}
314318
void dump() const;

llvm/lib/CAS/ObjectStore.cpp

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,87 @@ Error ObjectStore::validateTree(ObjectRef Root) {
217217
return Error::success();
218218
}
219219

220+
Expected<ObjectRef> ObjectStore::importObject(ObjectStore &Upstream,
221+
ObjectRef Other) {
222+
// There is no work to do if importing from self.
223+
if (this == &Upstream)
224+
return Other;
225+
226+
// FIXME: This replicates the logic in `OnDiskGraphDB::importFullTree`.
227+
// Copies the full CAS tree from upstream. Uses depth-first copying to protect
228+
// against the process dying during importing and leaving the database with an
229+
// incomplete tree. Note that if the upstream has missing nodes then the tree
230+
// will be copied with missing nodes as well, it won't be considered an error.
231+
232+
/// Keeps track of the state of visitation for current node and all of its
233+
/// parents. Upstream Cursor holds information only from upstream CAS.
234+
struct UpstreamCursor {
235+
ObjectHandle Node;
236+
size_t RefsCount;
237+
std::deque<ObjectRef> Refs;
238+
};
239+
SmallVector<UpstreamCursor, 16> CursorStack;
240+
/// PrimaryNodeStack holds the ObjectRef of the current CAS, with nodes either
241+
/// just stored in the CAS or nodes already exists in the current CAS.
242+
SmallVector<ObjectRef, 128> PrimaryRefStack;
243+
244+
auto enqueueNode = [&](ObjectHandle Node) {
245+
unsigned NumRefs = Upstream.getNumRefs(Node);
246+
std::deque<ObjectRef> Refs;
247+
for (unsigned I = 0; I < NumRefs; ++I)
248+
Refs.push_back(Upstream.readRef(Node, I));
249+
250+
CursorStack.push_back({Node, NumRefs, std::move(Refs)});
251+
};
252+
253+
auto UpstreamHandle = Upstream.load(Other);
254+
if (!UpstreamHandle)
255+
return UpstreamHandle.takeError();
256+
enqueueNode(*UpstreamHandle);
257+
258+
while (!CursorStack.empty()) {
259+
UpstreamCursor &Cur = CursorStack.back();
260+
if (Cur.Refs.empty()) {
261+
// Copy the node data into the primary store.
262+
// The bottom of \p PrimaryRefStack contains the ObjectRef for the
263+
// current node.
264+
assert(PrimaryRefStack.size() >= Cur.RefsCount);
265+
auto Refs = ArrayRef(PrimaryRefStack)
266+
.slice(PrimaryRefStack.size() - Cur.RefsCount);
267+
auto NewNode = store(Refs, Upstream.getData(Cur.Node));
268+
if (!NewNode)
269+
return NewNode.takeError();
270+
271+
// Remove the current node and its IDs from the stack.
272+
PrimaryRefStack.truncate(PrimaryRefStack.size() - Cur.RefsCount);
273+
CursorStack.pop_back();
274+
275+
PrimaryRefStack.push_back(*NewNode);
276+
continue;
277+
}
278+
279+
// Check if the node exists already.
280+
auto CurrentID = Cur.Refs.front();
281+
Cur.Refs.pop_front();
282+
auto Ref = getReference(Upstream.getID(CurrentID));
283+
if (Ref) {
284+
// If exists already, just need to enqueue the primary node.
285+
PrimaryRefStack.push_back(*Ref);
286+
continue;
287+
}
288+
289+
// Load child.
290+
auto PrimaryID = Upstream.load(CurrentID);
291+
if (LLVM_UNLIKELY(!PrimaryID))
292+
return PrimaryID.takeError();
293+
294+
enqueueNode(*PrimaryID);
295+
}
296+
297+
assert(PrimaryRefStack.size() == 1);
298+
return PrimaryRefStack.front();
299+
}
300+
220301
std::unique_ptr<MemoryBuffer>
221302
ObjectProxy::getMemoryBuffer(StringRef Name,
222303
bool RequiresNullTerminator) const {

llvm/test/tools/llvm-cas/ingest.test

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,7 @@ CHECK-ERROR: llvm-cas: get-cas-id: No such file or directory
3636
RUN: llvm-cas --cas %t/cas --ls-node-refs @%t/cas.id 2>&1 | FileCheck %s --check-prefix=CHECK-NODE-REFS
3737
CHECK-NODE-REFS: llvmcas://
3838
CHECK-NODE-REFS: llvmcas://
39+
40+
// Test exporting the entire tree.
41+
RUN: llvm-cas --cas %t/new-cas --upstream-cas %t/cas --import-from-upstream @%t/cas.id
42+
RUN: llvm-cas --cas %t/new-cas --ls-tree-recursive @%t/cas.id | FileCheck %s

llvm/tools/llvm-cas/llvm-cas.cpp

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ static int getCASIDForFile(ObjectStore &CAS, const CASID &ID,
6161
ArrayRef<std::string> Path);
6262
static int import(ObjectStore &CAS, ObjectStore &UpstreamCAS,
6363
ArrayRef<std::string> Objects);
64+
static int importFromUpstream(ObjectStore &CAS, ObjectStore &UpstreamCAS,
65+
ArrayRef<std::string> Objects);
6466
static int putCacheKey(ObjectStore &CAS, ActionCache &AC,
6567
ArrayRef<std::string> Objects);
6668
static int getCacheResult(ObjectStore &CAS, ActionCache &AC, const CASID &ID);
@@ -115,6 +117,7 @@ int main(int Argc, char **Argv) {
115117
MergeTrees,
116118
GetCASIDForFile,
117119
Import,
120+
ImportFromUpstream,
118121
PutCacheKey,
119122
GetCacheResult,
120123
CheckLockFiles,
@@ -142,6 +145,8 @@ int main(int Argc, char **Argv) {
142145
clEnumValN(MergeTrees, "merge", "merge paths/cas-ids"),
143146
clEnumValN(GetCASIDForFile, "get-cas-id", "get cas id for file"),
144147
clEnumValN(Import, "import", "import objects from another CAS"),
148+
clEnumValN(ImportFromUpstream, "import-from-upstream",
149+
"import object from upstream CAS"),
145150
clEnumValN(PutCacheKey, "put-cache-key",
146151
"set a value for a cache key"),
147152
clEnumValN(GetCacheResult, "get-cache-result",
@@ -237,11 +242,15 @@ int main(int Argc, char **Argv) {
237242
ExitOnErr(createStringError(inconvertibleErrorCode(),
238243
"missing <object> to operate on"));
239244

240-
if (Command == Import) {
245+
if (Command == Import || Command == ImportFromUpstream) {
241246
if (!UpstreamCAS)
242247
ExitOnErr(createStringError(inconvertibleErrorCode(),
243248
"missing '-upstream-cas'"));
244-
return import(*CAS, *UpstreamCAS, Inputs);
249+
250+
if (Command == Import)
251+
return import(*CAS, *UpstreamCAS, Inputs);
252+
253+
return importFromUpstream(*UpstreamCAS, *CAS, Inputs);
245254
}
246255

247256
if (Command == PutCacheKey || Command == GetCacheResult) {
@@ -671,6 +680,24 @@ static int import(ObjectStore &CAS, ObjectStore &UpstreamCAS,
671680
return 0;
672681
}
673682

683+
static int importFromUpstream(ObjectStore &FromCAS, ObjectStore &ToCAS,
684+
ArrayRef<std::string> Objects) {
685+
ExitOnError ExitOnErr("llvm-cas: import-from-upstream: ");
686+
for (StringRef Object : Objects) {
687+
CASID ID = ExitOnErr(FromCAS.parseID(Object));
688+
auto Ref = FromCAS.getReference(ID);
689+
if (!Ref) {
690+
ExitOnErr(createStringError(inconvertibleErrorCode(),
691+
"input not found: " + ID.toString()));
692+
return 1;
693+
}
694+
695+
auto Imported = ExitOnErr(ToCAS.importObject(FromCAS, *Ref));
696+
llvm::outs() << ToCAS.getID(Imported).toString() << "\n";
697+
}
698+
return 0;
699+
}
700+
674701
static int putCacheKey(ObjectStore &CAS, ActionCache &AC,
675702
ArrayRef<std::string> Objects) {
676703
ExitOnError ExitOnErr("llvm-cas: put-cache-key: ");
@@ -790,4 +817,4 @@ static int prune(cas::ObjectStore &CAS) {
790817
ExitOnError ExitOnErr("llvm-cas: prune: ");
791818
ExitOnErr(CAS.pruneStorageData());
792819
return 0;
793-
}
820+
}

0 commit comments

Comments
 (0)