Skip to content

Commit 9b0ccec

Browse files
Thomas StrombergThomas Stromberg
authored andcommitted
Add CreatedAt timestamp to each source
1 parent 63e4663 commit 9b0ccec

File tree

10 files changed

+127
-89
lines changed

10 files changed

+127
-89
lines changed

pkg/bluesky/bluesky.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ func (c *Client) Fetch(ctx context.Context, urlStr string) (*profile.Profile, er
101101
// Fetch recent posts
102102
posts, lastActive := c.fetchPosts(ctx, handle, 50)
103103
p.Posts = posts
104-
if lastActive != "" {
105-
p.LastActive = lastActive
104+
if lastActive != "" && lastActive > p.UpdatedAt {
105+
p.UpdatedAt = lastActive
106106
}
107107

108108
return p, nil
@@ -131,7 +131,7 @@ func parseAPIResponse(data []byte, urlStr, handle string) (*profile.Profile, err
131131
}
132132

133133
if resp.CreatedAt != "" {
134-
p.Fields["joined"] = resp.CreatedAt
134+
p.CreatedAt = resp.CreatedAt
135135
}
136136

137137
// Extract hashtags from bio

pkg/codeberg/codeberg.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ func parseHTML(data []byte, urlStr, username string) *profile.Profile {
189189
// Pattern: Joined on 2023-04-06
190190
joinedPattern := regexp.MustCompile(`Joined\s+on\s+(\d{4}-\d{2}-\d{2})`)
191191
if m := joinedPattern.FindStringSubmatch(content); len(m) > 1 {
192-
prof.Fields["joined"] = m[1]
192+
prof.CreatedAt = m[1]
193193
}
194194

195195
// Extract follower/following counts

pkg/codeberg/codeberg_test.go

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -169,12 +169,12 @@ func TestFetch_InvalidUsername(t *testing.T) {
169169

170170
func TestParseHTML(t *testing.T) {
171171
tests := []struct {
172-
name string
173-
html string
174-
username string
175-
wantName string
176-
wantPronouns string
177-
wantJoined string
172+
name string
173+
html string
174+
username string
175+
wantName string
176+
wantPronouns string
177+
wantCreatedAt string
178178
}{
179179
{
180180
name: "full profile with pronouns",
@@ -185,10 +185,10 @@ func TestParseHTML(t *testing.T) {
185185
<div>0 followers · 0 following</div>
186186
<div>Joined on 2023-04-06</div>
187187
</body></html>`,
188-
username: "johwhj",
189-
wantName: "Woohyun Joh",
190-
wantPronouns: "he/him",
191-
wantJoined: "2023-04-06",
188+
username: "johwhj",
189+
wantName: "Woohyun Joh",
190+
wantPronouns: "he/him",
191+
wantCreatedAt: "2023-04-06",
192192
},
193193
{
194194
name: "profile without pronouns",
@@ -198,9 +198,9 @@ func TestParseHTML(t *testing.T) {
198198
<span class="username">stephen-fox</span>
199199
<div>Joined on 2025-02-15</div>
200200
</body></html>`,
201-
username: "stephen-fox",
202-
wantName: "stephen-fox",
203-
wantJoined: "2025-02-15",
201+
username: "stephen-fox",
202+
wantName: "stephen-fox",
203+
wantCreatedAt: "2025-02-15",
204204
},
205205
{
206206
name: "organization profile",
@@ -224,8 +224,8 @@ func TestParseHTML(t *testing.T) {
224224
if tt.wantPronouns != "" && profile.Fields["pronouns"] != tt.wantPronouns {
225225
t.Errorf("Pronouns = %q, want %q", profile.Fields["pronouns"], tt.wantPronouns)
226226
}
227-
if tt.wantJoined != "" && profile.Fields["joined"] != tt.wantJoined {
228-
t.Errorf("Joined = %q, want %q", profile.Fields["joined"], tt.wantJoined)
227+
if tt.wantCreatedAt != "" && profile.CreatedAt != tt.wantCreatedAt {
228+
t.Errorf("Joined = %q, want %q", profile.CreatedAt, tt.wantCreatedAt)
229229
}
230230
})
231231
}

pkg/devto/devto.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,8 @@ func (c *Client) Fetch(ctx context.Context, urlStr string) (*profile.Profile, er
9797
// Fetch recent articles via API
9898
posts, lastActive := c.fetchArticles(ctx, username, 50)
9999
p.Posts = posts
100-
if lastActive != "" {
101-
p.LastActive = lastActive
100+
if lastActive != "" && lastActive > p.UpdatedAt {
101+
p.UpdatedAt = lastActive
102102
}
103103

104104
return p, nil
@@ -145,8 +145,7 @@ func parseHTML(data []byte, urlStr, username string) *profile.Profile {
145145
// Extract joined date
146146
joinedPattern := regexp.MustCompile(`<time\s+datetime="([^"]+)"[^>]*>([^<]+)</time>`)
147147
if m := joinedPattern.FindStringSubmatch(content); len(m) > 2 {
148-
p.Fields["joined"] = strings.TrimSpace(m[2])
149-
p.Fields["joined_datetime"] = m[1]
148+
p.CreatedAt = m[1] // ISO datetime format
150149
}
151150

152151
// Extract work/employment - look for <p>Work</p> followed by value

pkg/generic/generic.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,9 @@ func parseHTML(data []byte, urlStr string) *profile.Profile {
146146
p.Posts = posts
147147
p.Platform = "blog"
148148
if lastActive != "" {
149-
p.LastActive = lastActive
149+
p.UpdatedAt = lastActive
150150
} else if len(posts) > 0 && posts[0].URL != "" {
151-
p.LastActive = extractDateFromURL(posts[0].URL)
151+
p.UpdatedAt = extractDateFromURL(posts[0].URL)
152152
}
153153
}
154154

pkg/github/github.go

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,8 @@ func parseGraphQLResponse(data []byte, urlStr, _ string) (*profile.Profile, erro
335335
Company string `json:"company"`
336336
WebsiteURL string `json:"websiteUrl"`
337337
TwitterUser string `json:"twitterUsername"`
338+
CreatedAt string `json:"createdAt"`
339+
UpdatedAt string `json:"updatedAt"`
338340
SocialAccounts struct {
339341
Nodes []struct {
340342
URL string `json:"url"`
@@ -410,6 +412,14 @@ func parseGraphQLResponse(data []byte, urlStr, _ string) (*profile.Profile, erro
410412
}
411413
}
412414

415+
// Add account timestamps
416+
if user.CreatedAt != "" {
417+
prof.CreatedAt = user.CreatedAt
418+
}
419+
if user.UpdatedAt != "" {
420+
prof.UpdatedAt = user.UpdatedAt
421+
}
422+
413423
return prof, nil
414424
}
415425

@@ -450,10 +460,10 @@ func (c *Client) doAPIRequest(ctx context.Context, req *http.Request) ([]byte, e
450460
}
451461
defer func() { _ = resp.Body.Close() }() //nolint:errcheck // error ignored intentionally
452462

453-
// Parse rate limit headers (GitHub uses non-canonical casing, parse errors default to 0)
454-
//nolint:errcheck,canonicalheader // GitHub uses non-canonical header casing
463+
// Parse rate limit headers (parse errors default to 0)
464+
//nolint:errcheck,canonicalheader // GitHub uses non-canonical header casing, parse errors acceptable
455465
rateLimitRemain, _ := strconv.Atoi(resp.Header.Get("X-RateLimit-Remaining"))
456-
//nolint:errcheck,canonicalheader // GitHub uses non-canonical header casing
466+
//nolint:errcheck,canonicalheader // GitHub uses non-canonical header casing, parse errors acceptable
457467
rateLimitReset, _ := strconv.ParseInt(resp.Header.Get("X-RateLimit-Reset"), 10, 64)
458468
resetTime := time.Unix(rateLimitReset, 0)
459469

@@ -638,6 +648,8 @@ func parseJSON(data []byte, urlStr, _ string) (*profile.Profile, error) {
638648
AvatarURL string `json:"avatar_url"`
639649
HTMLURL string `json:"html_url"`
640650
Type string `json:"type"`
651+
CreatedAt string `json:"created_at"`
652+
UpdatedAt string `json:"updated_at"`
641653
}
642654

643655
if err := json.Unmarshal(data, &ghUser); err != nil {
@@ -722,6 +734,14 @@ func parseJSON(data []byte, urlStr, _ string) (*profile.Profile, error) {
722734
prof.Fields["type"] = ghUser.Type
723735
}
724736

737+
// Add account timestamps
738+
if ghUser.CreatedAt != "" {
739+
prof.CreatedAt = ghUser.CreatedAt
740+
}
741+
if ghUser.UpdatedAt != "" {
742+
prof.UpdatedAt = ghUser.UpdatedAt
743+
}
744+
725745
return prof, nil
726746
}
727747

pkg/mastodon/mastodon.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ func (c *Client) fetchViaAPI(ctx context.Context, host, username string) (*profi
155155
if accountID != "" {
156156
posts, lastActive := c.fetchStatuses(ctx, host, accountID, 50)
157157
p.Posts = posts
158-
if lastActive != "" {
159-
p.LastActive = lastActive
158+
if lastActive != "" && lastActive > p.UpdatedAt {
159+
p.UpdatedAt = lastActive
160160
}
161161
}
162162

@@ -210,6 +210,11 @@ func (*Client) parseAPIResponse(data []byte) (*profile.Profile, string, error) {
210210
// Filter out same-server Mastodon links
211211
p.SocialLinks = filterSameServerLinks(p.SocialLinks, p.URL)
212212

213+
// Add account creation date
214+
if acc.CreatedAt != "" {
215+
p.CreatedAt = acc.CreatedAt
216+
}
217+
213218
return p, acc.ID, nil
214219
}
215220

pkg/profile/profile.go

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,18 +47,17 @@ type Profile struct {
4747
Error string `json:",omitempty"` // Error message if fetch failed (e.g., "login required")
4848

4949
// Core profile data
50-
Username string `json:",omitempty"` // Handle/username (without @ prefix)
51-
Name string `json:",omitempty"` // Display name
52-
Bio string `json:",omitempty"` // Profile bio/description
53-
Location string `json:",omitempty"` // Geographic location
54-
Website string `json:",omitempty"` // Personal website URL
50+
Username string `json:",omitempty"` // Handle/username (without @ prefix)
51+
Name string `json:",omitempty"` // Display name
52+
Bio string `json:",omitempty"` // Profile bio/description
53+
Location string `json:",omitempty"` // Geographic location
54+
Website string `json:",omitempty"` // Personal website URL
55+
CreatedAt string `json:",omitempty"` // Account creation date (ISO timestamp)
56+
UpdatedAt string `json:",omitempty"` // Most recent activity or profile update (ISO timestamp)
5557

5658
// Platform-specific fields
5759
Fields map[string]string `json:",omitempty"` // Additional platform-specific data (headline, employer, etc.)
5860

59-
// Activity timestamp
60-
LastActive string `json:",omitempty"` // ISO timestamp of last known activity (post, comment, etc.)
61-
6261
// For further crawling
6362
SocialLinks []string `json:",omitempty"` // Other social media URLs detected on the profile
6463

pkg/reddit/reddit.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ func parseProfile(html, url, username string) (*profile.Profile, error) {
126126
// Extract cake day (account creation date)
127127
cakeDayPattern := regexp.MustCompile(`(?i)redditor since.*?(\d{4})`)
128128
if matches := cakeDayPattern.FindStringSubmatch(html); len(matches) > 1 {
129-
prof.Fields["joined_year"] = matches[1]
129+
prof.CreatedAt = matches[1] // Year only
130130
}
131131

132132
// Extract posts and comments with subreddit context

0 commit comments

Comments
 (0)