Skip to content

Commit

Permalink
* prefer I?ReadOnlyDictionary<,> over I?Dictionary<,>
Browse files Browse the repository at this point in the history
* add field for auto prop `SplitEntities` @ RevisionWithSplitting.cs
* using primary ctor @ SaverChangeSet.cs
@ c#/crawler
  • Loading branch information
n0099 committed Apr 1, 2024
1 parent 1bccc4e commit 252710b
Show file tree
Hide file tree
Showing 12 changed files with 56 additions and 55 deletions.
10 changes: 6 additions & 4 deletions c#/crawler/src/Db/Revision/RevisionWithSplitting.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,29 @@ namespace tbm.Crawler.Db.Revision;
public abstract class RevisionWithSplitting<TBaseRevision> : IRevision
where TBaseRevision : class, IRevision
{
private readonly Dictionary<Type, TBaseRevision> _splitEntities = [];

public uint TakenAt { get; set; }
public ushort? NullFieldsBitMask { get; set; }
public IDictionary<Type, TBaseRevision> SplitEntities { get; } = new Dictionary<Type, TBaseRevision>();
public IReadOnlyDictionary<Type, TBaseRevision> SplitEntities => _splitEntities;

public virtual bool IsAllFieldsIsNullExceptSplit() => throw new NotSupportedException();

protected TValue? GetSplitEntityValue<TSplitEntity, TValue>
(Func<TSplitEntity, TValue?> valueSelector)
where TSplitEntity : class, TBaseRevision =>
SplitEntities.TryGetValue(typeof(TSplitEntity), out var entity)
_splitEntities.TryGetValue(typeof(TSplitEntity), out var entity)
? valueSelector((TSplitEntity)entity)
: default;

protected void SetSplitEntityValue<TSplitEntity, TValue>
(TValue? value, Action<TSplitEntity, TValue?> valueSetter, Func<TSplitEntity> entityFactory)
where TSplitEntity : class, TBaseRevision
{
if (SplitEntities.TryGetValue(typeof(TSplitEntity), out var entity))
if (_splitEntities.TryGetValue(typeof(TSplitEntity), out var entity))
valueSetter((TSplitEntity)entity, value);
else
SplitEntities[typeof(TSplitEntity)] = entityFactory();
_splitEntities[typeof(TSplitEntity)] = entityFactory();
}

public class ModelBuilderExtension(ModelBuilder builder, string baseTableName)
Expand Down
4 changes: 2 additions & 2 deletions c#/crawler/src/Tieba/ClientRequester.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public class ClientRequester(
public async Task<JsonElement> RequestJson(
string url,
string clientVersion,
IDictionary<string, string> postParam,
IReadOnlyDictionary<string, string> postParam,
CancellationToken stoppingToken = default) =>
await Request(() => PostJson(url, postParam, clientVersion, stoppingToken), stream =>
{
Expand Down Expand Up @@ -84,7 +84,7 @@ private static async Task<T> Request<T>

private async Task<HttpResponseMessage> PostJson(
string url,
IDictionary<string, string> postParam,
IReadOnlyDictionary<string, string> postParam,
string clientVersion,
CancellationToken stoppingToken = default)
{
Expand Down
5 changes: 3 additions & 2 deletions c#/crawler/src/Tieba/Crawl/CrawlPost.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ public async Task<SavedThreadsList> CrawlThreads
var threadsLatestReplyPostedAt = currentPageChangeSet.AllAfter
.Select(th => th.LatestReplyPostedAt).ToList();
minLatestReplyPostedAt = threadsLatestReplyPostedAt.Min();
if (crawlingPage == 1) _latestReplyPostedAtCheckpointCache[fid] = threadsLatestReplyPostedAt.Max();
if (crawlingPage == 1)
_latestReplyPostedAtCheckpointCache[fid] = threadsLatestReplyPostedAt.Max();
}
else
{ // retry this page
Expand Down Expand Up @@ -98,7 +99,7 @@ await Task.WhenAll(shouldCrawlParentPosts.Select(async tid =>
}

public async Task CrawlSubReplies(
IDictionary<Tid, SaverChangeSet<ReplyPost>> savedRepliesKeyByTid,
IReadOnlyDictionary<Tid, SaverChangeSet<ReplyPost>> savedRepliesKeyByTid,
Fid fid,
CancellationToken stoppingToken = default)
{
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Facade/BaseCrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ protected virtual void ThrowIfEmptyUsersEmbedInPosts() { }
protected virtual void PostParseHook(
TResponse response,
CrawlRequestFlag flag,
IDictionary<PostId, TPost> parsedPostsInResponse) { }
IReadOnlyDictionary<PostId, TPost> parsedPostsInResponse) { }
protected virtual void BeforeCommitSaveHook(CrawlerDbContext db, UserSaver userSaver) { }
protected virtual void PostCommitSaveHook(
SaverChangeSet<TPost> savedPosts,
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Facade/SubReplyCrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public class SubReplyCrawlFacade(
protected override void PostParseHook(
SubReplyResponse response,
CrawlRequestFlag flag,
IDictionary<PostId, SubReplyPost> parsedPostsInResponse)
IReadOnlyDictionary<PostId, SubReplyPost> parsedPostsInResponse)
{
foreach (var sr in parsedPostsInResponse.Values)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public class ThreadArchiveCrawlFacade(
protected override void PostParseHook(
ThreadResponse response,
CrawlRequestFlag flag,
IDictionary<PostId, ThreadPost> parsedPostsInResponse)
IReadOnlyDictionary<PostId, ThreadPost> parsedPostsInResponse)
{ // the second respond with flag is as same as the first one so just skip it
if (flag == CrawlRequestFlag.ThreadClientVersion602) return;
var data = response.Data;
Expand Down
34 changes: 17 additions & 17 deletions c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,23 @@ protected override void BeforeCommitSaveHook(CrawlerDbContext db, UserSaver user
_ = db.Users.UpsertRange(newLatestRepliersExceptLocked).NoUpdate().Run();
}

protected override void PostParseHook(
ThreadResponse response,
CrawlRequestFlag flag,
IReadOnlyDictionary<PostId, ThreadPost> parsedPostsInResponse)
{
var data = response.Data;
if (flag == CrawlRequestFlag.ThreadClientVersion602) FillFromRequestingWith602(data.ThreadList);
if (flag != CrawlRequestFlag.None) return;
UserParser.Parse(data.UserList);
UserParser.ResetUsersIcon();
ParseLatestRepliers(data.ThreadList);

// remove livepost threads since their real parent forum may not match with current crawling fid
data.ThreadList.Where(th => th.LivePostType != "")
.ForEach(th => Posts.TryRemove((Tid)th.Tid, out _));
}

protected void ParseLatestRepliers(IEnumerable<Thread> threads) =>
threads.Select(th => th.LastReplyer ?? null) // LastReplyer will be null when LivePostType != ""
.OfType<TbClient.User>() // filter out nulls
Expand All @@ -67,21 +84,4 @@ join parsed in Posts.Values on (Tid)inResponse.Tid equals parsed.Tid
// LastReplyer will be null when LivePostType != "", but LastTimeInt will have expected timestamp value
t.parsed.LatestReplierUid = t.inResponse.LastReplyer?.Uid;
});

protected override void PostParseHook(
ThreadResponse response,
CrawlRequestFlag flag,
IDictionary<PostId, ThreadPost> parsedPostsInResponse)
{
var data = response.Data;
if (flag == CrawlRequestFlag.ThreadClientVersion602) FillFromRequestingWith602(data.ThreadList);
if (flag != CrawlRequestFlag.None) return;
UserParser.Parse(data.UserList);
UserParser.ResetUsersIcon();
ParseLatestRepliers(data.ThreadList);

// remove livepost threads since their real parent forum may not match with current crawling fid
data.ThreadList.Where(th => th.LivePostType != "")
.ForEach(th => Posts.TryRemove((Tid)th.Tid, out _));
}
}
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Facade/ThreadLateCrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ public class ThreadLateCrawlFacade(
public delegate ThreadLateCrawlFacade New(Fid fid);

public async Task CrawlThenSave(
IDictionary<Tid, FailureCount> failureCountsKeyByTid,
IReadOnlyDictionary<Tid, FailureCount> failureCountsKeyByTid,
CancellationToken stoppingToken = default)
{
var threads = await Task.WhenAll(
Expand Down
6 changes: 3 additions & 3 deletions c#/crawler/src/Tieba/Crawl/Saver/IRevisionProperties.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ namespace tbm.Crawler.Tieba.Crawl.Saver;

public interface IRevisionProperties
{
protected static IDictionary<Type, IDictionary<string, PropertyInfo>> Cache { get; } = GetPropsKeyByType(
protected static IReadOnlyDictionary<Type, IReadOnlyDictionary<string, PropertyInfo>> Cache { get; } = GetPropsKeyByType(
[typeof(ThreadRevision), typeof(ReplyRevision), typeof(SubReplyRevision), typeof(UserRevision)]);

private static IDictionary<Type, IDictionary<string, PropertyInfo>> GetPropsKeyByType(IEnumerable<Type> types) =>
private static IReadOnlyDictionary<Type, IReadOnlyDictionary<string, PropertyInfo>> GetPropsKeyByType(IEnumerable<Type> types) =>
types.ToDictionary(type => type, type =>
(IDictionary<string, PropertyInfo>)type.GetProperties().ToDictionary(prop => prop.Name));
(IReadOnlyDictionary<string, PropertyInfo>)type.GetProperties().ToDictionary(prop => prop.Name));
}
35 changes: 16 additions & 19 deletions c#/crawler/src/Tieba/Crawl/Saver/SaverChangeSet.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,24 @@

namespace tbm.Crawler.Tieba.Crawl.Saver;

public class SaverChangeSet<TPost> where TPost : class, IPost
public class SaverChangeSet<TPost>(
IReadOnlyCollection<TPost> existingBefore,
ICollection<TPost> existingAfterAndNewlyAdded,
Func<TPost, PostId> postIdSelector)
where TPost : class, IPost
{
public SaverChangeSet(
IReadOnlyCollection<TPost> existingBefore,
ICollection<TPost> existingAfterAndNewlyAdded,
Func<TPost, PostId> postIdSelector)
{
Existing = existingBefore
.OrderBy(postIdSelector)
.EquiZip(existingAfterAndNewlyAdded
public IReadOnlyCollection<(TPost Before, TPost After)> Existing { get; } = existingBefore
.OrderBy(postIdSelector)
.EquiZip(existingAfterAndNewlyAdded
.IntersectBy(existingBefore.Select(postIdSelector), postIdSelector)
.OrderBy(postIdSelector),
(before, after) => (before, after))
.ToList().AsReadOnly();
NewlyAdded = existingAfterAndNewlyAdded
.ExceptBy(existingBefore.Select(postIdSelector), postIdSelector)
.ToList().AsReadOnly();
AllAfter = existingAfterAndNewlyAdded.ToList().AsReadOnly();
}
(before, after) => (before, after))
.ToList().AsReadOnly();

public IReadOnlyCollection<(TPost Before, TPost After)> Existing { get; }
public IReadOnlyCollection<TPost> NewlyAdded { get; }
public IReadOnlyCollection<TPost> AllAfter { get; }
public IReadOnlyCollection<TPost> NewlyAdded { get; } = existingAfterAndNewlyAdded
.ExceptBy(existingBefore.Select(postIdSelector), postIdSelector)
.ToList().AsReadOnly();

public IReadOnlyCollection<TPost> AllAfter { get; } = existingAfterAndNewlyAdded
.ToList().AsReadOnly();
}
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ public abstract class SaverWithRevision<TBaseRevision> : IRevisionProperties
{
protected delegate void RevisionUpsertDelegate(CrawlerDbContext db, IEnumerable<TBaseRevision> revision);

protected virtual IDictionary<Type, RevisionUpsertDelegate> RevisionUpsertDelegatesKeyBySplitEntityType =>
protected virtual IReadOnlyDictionary<Type, RevisionUpsertDelegate> RevisionUpsertDelegatesKeyBySplitEntityType =>
throw new NotSupportedException();

protected virtual NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) =>
Expand Down
7 changes: 4 additions & 3 deletions c#/imagePipeline/src/Consumer/HashConsumer.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using System.Collections.ObjectModel;
using OpenCvSharp.ImgHash;
using Size = OpenCvSharp.Size;

Expand All @@ -6,19 +7,19 @@ namespace tbm.ImagePipeline.Consumer;
public sealed class HashConsumer : MatrixConsumer, IDisposable
{
private readonly FailedImageHandler _failedImageHandler;
private readonly Dictionary<ImgHashBase, Action<ImageHash, byte[]>> _imageHashSettersKeyByAlgorithm;
private readonly ReadOnlyDictionary<ImgHashBase, Action<ImageHash, byte[]>> _imageHashSettersKeyByAlgorithm;

[SuppressMessage("Correctness", "SS004:Implement Equals() and GetHashcode() methods for a type used in a collection.")]
public HashConsumer(FailedImageHandler failedImageHandler)
{
_failedImageHandler = failedImageHandler;
_imageHashSettersKeyByAlgorithm = new()
_imageHashSettersKeyByAlgorithm = new Dictionary<ImgHashBase, Action<ImageHash, byte[]>>
{
{PHash.Create(), (image, bytes) => image.PHash = BitConverter.ToUInt64(bytes)},
{AverageHash.Create(), (image, bytes) => image.AverageHash = BitConverter.ToUInt64(bytes)},
{BlockMeanHash.Create(), (image, bytes) => image.BlockMeanHash = bytes},
{MarrHildrethHash.Create(), (image, bytes) => image.MarrHildrethHash = bytes}
};
}.AsReadOnly();
}

public void Dispose() => _imageHashSettersKeyByAlgorithm.Keys.ForEach(hash => hash.Dispose());
Expand Down

0 comments on commit 252710b

Please sign in to comment.