Skip to content

Commit

Permalink
wip, use a manifest
Browse files Browse the repository at this point in the history
  • Loading branch information
nzdev committed Dec 8, 2020
1 parent 4d119d5 commit 3f0ea94
Show file tree
Hide file tree
Showing 8 changed files with 461 additions and 34 deletions.
169 changes: 157 additions & 12 deletions src/Examine.AzureDirectory/AzureLuceneDirectory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Text.Json;
using Azure;
using Azure.Storage.Blobs;
using Examine.LuceneEngine.Directories;
Expand Down Expand Up @@ -86,14 +87,14 @@ protected string NormalizeContainerRootFolder(string rootFolder)
{
if (string.IsNullOrEmpty(rootFolder))
return string.Empty;
rootFolder = rootFolder.Trim('/');
rootFolder = rootFolder + "/";
rootFolder = rootFolder.Trim('/');
rootFolder = rootFolder + "/";
return rootFolder;
}

protected virtual LockFactory GetLockFactory()
{
return IsReadOnly ? (LockFactory)new NoopLockFactory()
return IsReadOnly ? (LockFactory)new NoopLockFactory()
: new MultiIndexLockFactory(new AzureDirectorySimpleLockFactory(this), CacheDirectory.LockFactory);
}
protected virtual BlobClient GetBlobClient(string blobName)
Expand Down Expand Up @@ -132,7 +133,7 @@ public virtual void RebuildCache()
{
CacheDirectory.TouchFile(file);
var blob = GetBlobClient(RootFolder + file);
SyncFile(blob,file);
SyncFile(blob, file);
}
}

Expand Down Expand Up @@ -185,8 +186,8 @@ public override bool FileExists(string name)
// something isn't quite right, need to re-sync

Trace.WriteLine($"ERROR {e.ToString()} Exception thrown while checking file ({name}) exists for {RootFolder}");
SetDirty();
return BlobExists(name);
SetDirty();
return BlobExists(name);
}
}

Expand All @@ -213,7 +214,7 @@ public override long FileModified(string name)

//This is the data structure of how the default Lucene FSDirectory returns this value so we want
// to be consistent with how Lucene works
return (long) utcDate.Subtract(new DateTime(1970, 1, 1, 0, 0, 0)).TotalMilliseconds;
return (long)utcDate.Subtract(new DateTime(1970, 1, 1, 0, 0, 0)).TotalMilliseconds;
}

// TODO: Need to check lucene source, returning this value could be problematic
Expand All @@ -235,7 +236,7 @@ public override void TouchFile(string name)
CacheDirectory.TouchFile(name);
SetDirty();
}
protected void SyncFile(BlobClient _blob,string fileName)
protected void SyncFile(BlobClient _blob, string fileName)
{
Trace.WriteLine($"INFO Syncing file {fileName} for {RootFolder}");
// then we will get it fresh into local deflatedName
Expand Down Expand Up @@ -281,7 +282,7 @@ protected void SyncFile(BlobClient _blob,string fileName)
#endif
}
}

}
}
/// <summary>Removes an existing file in the directory. </summary>
Expand Down Expand Up @@ -440,7 +441,7 @@ public override IndexInput OpenInput(string name)

if (TryGetBlobFile(name, out var blob, out var err))
{
return _azureIndexInputFactory .GetIndexInput(this, blob);
return _azureIndexInputFactory.GetIndexInput(this, blob);
}
else
{
Expand Down Expand Up @@ -524,7 +525,7 @@ public virtual bool ShouldCompressFile(string path)
/// If _dirty is true and blob storage files are looked up, this will return those blob storage files, this is a performance gain so
/// we don't double query blob storage.
/// </returns>
public override string[] CheckDirty()
public string[] CheckDirty()
{
if (_dirty)
{
Expand Down Expand Up @@ -552,6 +553,18 @@ public override string[] CheckDirty()
return null;
}

/// <summary>
/// Checks dirty flag and sets the _inSync flag after querying the blob strorage vs local storage segment gen
/// </summary>
/// <returns>
/// If _dirty is true and blob storage files are looked up, this will return those blob storage files, this is a performance gain so
/// we don't double query blob storage.
/// </returns>
public override string[] CheckDirtyWithoutWriter()
{
return CheckDirty();
}

/// <summary>
/// Called when the index is out of sync with the master index
/// </summary>
Expand All @@ -560,7 +573,7 @@ protected virtual void HandleOutOfSync()
//Do nothing
}

private void SetDirty()
public override void SetDirty()
{
if (!_dirty)
{
Expand Down Expand Up @@ -602,5 +615,137 @@ private bool TryGetBlobFile(string name, out BlobClient blob, out RequestFailedE
return false;
}
}

#region Sync
/// <summary>Creates a new, empty file in the directory with the given name.
/// Returns a stream writing this file.
/// </summary>
public IndexOutput CreateOutput(string blobFileName, string name)
{
SetDirty();

//if we are readonly, then we don't modify anything
if (IsReadOnly)
{
return _noopIndexOutput;
}

var blob = _blobContainer.GetBlobClient(GenerateBlobName(blobFileName));
return _azureIndexOutputFactory.CreateIndexOutput(this, blob, name);
}

private string GenerateBlobName(string blobFileName)
{
return RootFolder + blobFileName;
}
private string CleanBlobName(string blobFileName)
{
return blobFileName.Replace(RootFolder, "");
}
protected override void CleanupRemoteFiles()
{
//TODO : Get all manifests on remote. Find older than x. Remove files older than x not referenced by a newer manifest, delete old manifest
List<ExamineDirectoryManifest> manifests = GetAllManifests();
var orderedManifests = manifests.OrderByDescending(x => x.Modified);
var retainedManifests = orderedManifests.Where((x, y) => y < 2 || !ManifestExpired(x));
var removableManifests = orderedManifests.Where(x => !retainedManifests.Contains(x));
var retainedFiles = removableManifests.SelectMany(x => x.Entries).Select(x => x.BlobFileName).ToDictionary(x => x, y => y);
foreach (var manifest in removableManifests)
{
foreach (var entry in manifest.Entries)
{
if (!retainedFiles.ContainsKey(entry.BlobFileName))
{
_blobContainer.DeleteBlobIfExists(GenerateBlobName(entry.BlobFileName));
}
}
_blobContainer.DeleteBlobIfExists(GenerateBlobName(GenerateManifestFileName(manifest)));
}
}

protected virtual bool ManifestExpired(ExamineDirectoryManifest manifest)
{
//Suggest tracking sync state
return manifest.Modified < DateTime.Now.AddHours(1).Ticks;
}
protected override void UploadToRemote(ExamineDirectoryManifest manifest)
{
foreach (var item in manifest.Entries)
{
if (item.OriginalManifestId == manifest.Id || !BlobExists(item.BlobFileName))
{
//New/Updated/missing file
CreateOutput(item.BlobFileName, item.LuceneFileName);
}
}

var jsonString = SerializeManifest(manifest);
using (var stream = new MemoryStream())
{
var sw = new StreamWriter(stream);
sw.Write(jsonString);
sw.Flush();
stream.Position = 0;
var client = _blobContainer.GetBlobClient(GenerateBlobName( GenerateManifestFileName(manifest)));
client.Upload(stream);
}
}

private string GenerateManifestFileName(ExamineDirectoryManifest manifest)
{
return MANIFEST_FILE_PREFIX + manifest.Id + MANIFEST_FILE_EXTENSION;
}

public const string MANIFEST_FILE_PREFIX = "cc-";
public const string MANIFEST_FILE_EXTENSION = ".manifest";
protected override ExamineDirectoryManifest GetMostRecentManifest()
{
List<ExamineDirectoryManifest> manifests = GetAllManifests();
return manifests.OrderByDescending(x => x.Modified).FirstOrDefault();
}

protected override List<ExamineDirectoryManifest> GetAllManifests()
{
List<ExamineDirectoryManifest> manifests = new List<ExamineDirectoryManifest>();
foreach (var blob in _blobContainer.GetBlobs(prefix: RootFolder + MANIFEST_FILE_PREFIX))
{
try
{
if (!blob.Name.EndsWith(MANIFEST_FILE_EXTENSION))
{
continue;
}
var client = _blobContainer.GetBlobClient(blob.Name);
using (var ms = new MemoryStream())
{
client.DownloadTo(ms);
ms.Seek(0, SeekOrigin.Begin);
using (StreamReader reader = new StreamReader(ms))
{
string text = reader.ReadToEnd();
var manifest = DeserializeManifest(text);
manifests.Add(manifest);
}
}
}
catch (Exception ex)
{
Trace.WriteLine($"ERROR Failed to download manifest {blob}. {ex.ToString()}");
}
}

return manifests;
}

public override string SerializeManifest(ExamineDirectoryManifest manifest)
{
return JsonSerializer.Serialize(manifest);
}
public override ExamineDirectoryManifest DeserializeManifest(string manifestText)
{
return JsonSerializer.Deserialize<ExamineDirectoryManifest>(manifestText);
}

#endregion
}
}
14 changes: 13 additions & 1 deletion src/Examine.AzureDirectory/AzureReadOnlyLuceneDirectory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System.IO;
using System.Linq;
using Azure.Storage.Blobs;
using Examine.LuceneEngine;
using Examine.LuceneEngine.Directories;
using Lucene.Net.Store;
using Directory = Lucene.Net.Store.Directory;
Expand Down Expand Up @@ -70,6 +71,13 @@ private void CreateOrReadCache()
public void ResyncCache()
{
Trace.WriteLine($"INFO Rebuilding index cache {RootFolder}");
var manifest = this.GetMostRecentManifest();
if (manifest != null)
{
//no files
return;
//SyncManifestFiles(manifest);
}
try
{
Trace.WriteLine($"Clearing index cache {RootFolder}");
Expand Down Expand Up @@ -198,6 +206,10 @@ public override Lock MakeLock(string name)
{
return base.MakeLock(name);
}


public override void SyncManifest(ExamineIndexWriter writer)
{
//DO nothing. Read only
}
}
}
16 changes: 9 additions & 7 deletions src/Examine.Test/AzureDirectoryTests/ReadOnlyTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -115,24 +115,26 @@ public void Read_Only_Sync()
}),
new Task(() =>
{
for(var i = 0; i < 3; i++) // write index x 3
for(var a = 0; a < 3; a++) // write index x 3
{
ValueSet cloned = GetRandomValueSet();
writeIndex.IndexItem(cloned);
}
}),
new Task(() =>
{
//for(var i = 0; i < 3; i++)
//{
// var s = readSearcher.GetSearcher().CreateQuery().NativeQuery("test"); // force the reader to sync
// Thread.Sleep(50);
//}
for(var i = 0; i < 10; i++)
{
var s = readSearcher.GetSearcher().CreateQuery().NativeQuery("test"); // force the reader to sync
var sr = s.Execute();
var er = sr.Skip(0).ToList();
Thread.Sleep(50);
}
}),
new Task(() =>
{
ValueSet cloned = GetRandomValueSet();
// readIndex.IndexItem(cloned); // readonly index doesn't write
// readIndex.IndexItem(cloned); // readonly index doesn't write //don't do this
})
};
foreach (var t in tasks) t.Start();
Expand Down
2 changes: 2 additions & 0 deletions src/Examine/Examine.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@
<Compile Include="..\SolutionInfo.cs">
<Link>Properties\SolutionInfo.cs</Link>
</Compile>
<Compile Include="LuceneEngine\Directories\ExamineDirectoryManifest.cs" />
<Compile Include="FieldDefinition.cs" />
<Compile Include="FieldDefinitionTypes.cs" />
<Compile Include="IExamineManager.cs" />
Expand Down Expand Up @@ -134,6 +135,7 @@
<Compile Include="LuceneEngine\Directories\TempEnvDirectoryFactory.cs" />
<Compile Include="LuceneEngine\CultureInvariantStandardAnalyzer.cs" />
<Compile Include="LuceneEngine\EmailAddressAnalyzer.cs" />
<Compile Include="LuceneEngine\ExamineIndexWriter.cs" />
<Compile Include="LuceneEngine\IFieldValueTypeFactory.cs" />
<Compile Include="LuceneEngine\Indexing\GenericAnalyzerFieldValueType.cs" />
<Compile Include="LuceneEngine\Indexing\IIndexRangeValueType.cs" />
Expand Down
Loading

0 comments on commit 3f0ea94

Please sign in to comment.