@@ -109,9 +109,18 @@ func New(ctx context.Context, opts ...Option) (*Client, error) {
109109 cfg .logger .InfoContext (ctx , "linkedin client created" , "cookie_count" , len (cookies ))
110110
111111 return & Client {
112- httpClient : & http.Client {Jar : jar , Timeout : 3 * time .Second },
113- cache : cfg .cache ,
114- logger : cfg .logger ,
112+ httpClient : & http.Client {
113+ Jar : jar ,
114+ Timeout : 3 * time .Second ,
115+ CheckRedirect : func (_ * http.Request , via []* http.Request ) error {
116+ if len (via ) >= 1 {
117+ return http .ErrUseLastResponse
118+ }
119+ return nil
120+ },
121+ },
122+ cache : cfg .cache ,
123+ logger : cfg .logger ,
115124 }, nil
116125}
117126
@@ -138,26 +147,39 @@ func (c *Client) Fetch(ctx context.Context, urlStr string) (*profile.Profile, er
138147 return nil , fmt .Errorf ("request failed: %w" , err )
139148 }
140149
141- prof , parseErr := parseProfile (body , urlStr )
142- if parseErr != nil {
143- // Log additional context for debugging
144- c .logger .DebugContext (ctx , "linkedin parse failed" ,
145- "url" , urlStr ,
146- "error" , parseErr ,
147- "response_size" , len (body ),
148- )
149- return prof , parseErr
150- }
151-
152- // Extract URN for API calls
150+ // Extract username from URL for API calls
153151 username := extractPublicID (urlStr )
154- memberURN := extractMemberURN (body )
152+
153+ // Try to extract the target profile's member URN from the HTML
154+ // IMPORTANT: The HTML contains URNs for both logged-in user and viewed profile
155+ // We must extract the URN for the TARGET profile, not the logged-in user
156+ memberURN := extractTargetMemberURN (body , username )
155157 c .logger .DebugContext (ctx , "extracted for API call" , "username" , username , "memberURN" , memberURN )
156158
157- // If no employer found from HTML parsing, try the Voyager API
158- c .logger .DebugContext (ctx , "checking employer" , "employer" , prof .Fields ["employer" ])
159+ // PRIMARY: Use Voyager API to get profile data (avoids logged-in user data contamination)
160+ // The HTML often contains the logged-in user's data mixed with viewed profile data
161+ var prof * profile.Profile
162+ if memberURN != "" {
163+ prof = c .fetchProfileFromAPI (ctx , memberURN , urlStr , username )
164+ }
165+
166+ // FALLBACK: Parse HTML only if API failed
167+ if prof == nil {
168+ var parseErr error
169+ prof , parseErr = parseProfile (body , urlStr )
170+ if parseErr != nil {
171+ c .logger .DebugContext (ctx , "linkedin parse failed" ,
172+ "url" , urlStr ,
173+ "error" , parseErr ,
174+ "response_size" , len (body ),
175+ )
176+ return prof , parseErr
177+ }
178+ }
179+
180+ // Ensure we have experience data
159181 if prof .Fields ["employer" ] == "" || prof .Fields ["title" ] == "" {
160- if username != "" || memberURN != "" {
182+ if memberURN != "" {
161183 exp := c .fetchExperienceFromAPI (ctx , username , memberURN )
162184 if exp .employer != "" && prof .Fields ["employer" ] == "" {
163185 prof .Fields ["employer" ] = exp .employer
@@ -170,7 +192,7 @@ func (c *Client) Fetch(ctx context.Context, urlStr string) (*profile.Profile, er
170192 }
171193 }
172194
173- // If no location found from HTML parsing, try the Voyager API
195+ // Ensure we have location
174196 if prof .Location == "" && memberURN != "" {
175197 loc := c .fetchLocationFromAPI (ctx , memberURN )
176198 if loc != "" {
@@ -179,12 +201,231 @@ func (c *Client) Fetch(ctx context.Context, urlStr string) (*profile.Profile, er
179201 }
180202 }
181203
182- return prof , parseErr
204+ // Extract social links from HTML (API doesn't provide these)
205+ prof .SocialLinks = htmlutil .SocialLinks (string (body ))
206+ extractContactInfo (prof , string (body ))
207+ prof .SocialLinks = filterSamePlatformLinks (prof .SocialLinks )
208+
209+ return prof , nil
183210}
184211
185212// EnableDebug enables debug logging.
186213func (c * Client ) EnableDebug () { c .debug = true }
187214
215+ // fetchProfileFromAPI fetches the profile data from the LinkedIn Voyager API.
216+ // This is the primary method for getting profile data as it avoids logged-in user data contamination.
217+ // Uses the /identity/profiles/{publicIdentifier} endpoint which returns profile by username.
218+ func (c * Client ) fetchProfileFromAPI (ctx context.Context , _ , profileURL , username string ) * profile.Profile {
219+ if username == "" {
220+ c .logger .DebugContext (ctx , "no username for profile API call" )
221+ return nil
222+ }
223+
224+ if err := c .ensureSessionCookies (ctx ); err != nil {
225+ c .logger .DebugContext (ctx , "failed to get session cookies for profile" , "error" , err )
226+ return nil
227+ }
228+
229+ // Use the identity/profiles endpoint which takes publicIdentifier (username) directly
230+ // This avoids the problem of extracting wrong URN from HTML
231+ apiURL := fmt .Sprintf ("https://www.linkedin.com/voyager/api/identity/profiles/%s" , url .PathEscape (username ))
232+
233+ c .logger .DebugContext (ctx , "fetching profile from voyager api" , "url" , apiURL )
234+
235+ req , err := http .NewRequestWithContext (ctx , http .MethodGet , apiURL , http .NoBody )
236+ if err != nil {
237+ c .logger .DebugContext (ctx , "profile api request creation failed" , "error" , err )
238+ return nil
239+ }
240+
241+ setVoyagerHeaders (req , c .httpClient , c .logger )
242+ req .Header .Set ("Accept" , "application/vnd.linkedin.normalized+json+2.1" )
243+
244+ body , err := cache .FetchURL (ctx , c .cache , c .httpClient , req , c .logger )
245+ if err != nil {
246+ c .logger .DebugContext (ctx , "profile api request failed" , "error" , err )
247+ return nil
248+ }
249+
250+ c .logger .DebugContext (ctx , "profile api response" , "bodySize" , len (body ))
251+
252+ return extractProfileFromIdentityAPI (body , profileURL , username , c .logger )
253+ }
254+
255+ // extractProfileFromIdentityAPI extracts profile data from the /identity/profiles/ API response.
256+ // This endpoint returns profile data with fields like firstName, lastName, headline, geoLocationName.
257+ func extractProfileFromIdentityAPI (body []byte , profileURL , username string , logger * slog.Logger ) * profile.Profile {
258+ prof := & profile.Profile {
259+ Platform : platform ,
260+ URL : profileURL ,
261+ Authenticated : true ,
262+ Username : username ,
263+ Fields : make (map [string ]string ),
264+ }
265+
266+ // The identity/profiles API returns JSON with direct fields:
267+ // firstName, lastName, headline, geoLocationName, industryName, etc.
268+
269+ // Extract firstName and lastName
270+ firstName := extractJSONField (string (body ), "firstName" )
271+ lastName := extractJSONField (string (body ), "lastName" )
272+ if firstName != "" {
273+ prof .Name = unescapeJSON (firstName )
274+ if lastName != "" {
275+ prof .Name += " " + unescapeJSON (lastName )
276+ }
277+ logger .Debug ("extracted name from identity API" , "name" , prof .Name )
278+ }
279+
280+ // Extract headline (bio)
281+ if headline := extractJSONField (string (body ), "headline" ); headline != "" {
282+ prof .Bio = unescapeJSON (headline )
283+ logger .Debug ("extracted headline from identity API" , "headline" , prof .Bio )
284+ }
285+
286+ // Extract location
287+ if loc := extractJSONField (string (body ), "geoLocationName" ); loc != "" {
288+ prof .Location = unescapeJSON (loc )
289+ logger .Debug ("extracted location from identity API" , "location" , prof .Location )
290+ }
291+
292+ // Extract pronouns
293+ pronounRe := regexp .MustCompile (`"standardizedPronoun"\s*:\s*"(HE_HIM|SHE_HER|THEY_THEM)"` )
294+ if m := pronounRe .FindSubmatch (body ); len (m ) > 1 {
295+ pronouns := convertStandardizedPronoun (string (m [1 ]))
296+ if pronouns != "" {
297+ prof .Fields ["pronouns" ] = pronouns
298+ logger .Debug ("extracted pronouns from identity API" , "pronouns" , pronouns )
299+ }
300+ }
301+
302+ // If no name found, return nil to fall back to HTML parsing
303+ if prof .Name == "" {
304+ logger .Debug ("no name found in identity API response" )
305+ return nil
306+ }
307+
308+ return prof
309+ }
310+
311+ // extractProfileFromGraphQLResponse extracts profile data from the TOP_CARD GraphQL response.
312+ // This is a fallback method if the identity/profiles endpoint fails.
313+ func extractProfileFromGraphQLResponse (body []byte , profileURL , username string , logger * slog.Logger ) * profile.Profile {
314+ prof := & profile.Profile {
315+ Platform : platform ,
316+ URL : profileURL ,
317+ Authenticated : true ,
318+ Username : username ,
319+ Fields : make (map [string ]string ),
320+ }
321+
322+ // The TOP_CARD response contains the profile name and headline in "text" fields
323+ // Structure: elements containing titleV2 with text for name, subtitleV2 for headline
324+ // Look for patterns like: "titleV2":{"text":{"text":"Stephen Fox Jr."
325+
326+ // Extract name from titleV2
327+ titleRe := regexp .MustCompile (`"titleV2"\s*:\s*\{[^}]*"text"\s*:\s*\{[^}]*"text"\s*:\s*"([^"]+)"` )
328+ if m := titleRe .FindSubmatch (body ); len (m ) > 1 {
329+ prof .Name = strings .TrimSpace (string (m [1 ]))
330+ logger .Debug ("extracted name from titleV2" , "name" , prof .Name )
331+ } else {
332+ logger .Debug ("titleV2 pattern not found" )
333+ }
334+
335+ // Extract headline/bio from subtitleV2
336+ subtitleRe := regexp .MustCompile (`"subtitleV2"\s*:\s*\{[^}]*"text"\s*:\s*\{[^}]*"text"\s*:\s*"([^"]+)"` )
337+ if m := subtitleRe .FindSubmatch (body ); len (m ) > 1 {
338+ prof .Bio = strings .TrimSpace (string (m [1 ]))
339+ logger .Debug ("extracted bio from subtitleV2" , "bio" , prof .Bio )
340+ } else {
341+ logger .Debug ("subtitleV2 pattern not found" )
342+ }
343+
344+ // Extract location
345+ loc := extractLocationFromGraphQLResponse (body )
346+ if loc != "" {
347+ prof .Location = loc
348+ }
349+
350+ // Extract pronouns - look for standardizedPronoun
351+ pronounRe := regexp .MustCompile (`"standardizedPronoun"\s*:\s*"(HE_HIM|SHE_HER|THEY_THEM)"` )
352+ if m := pronounRe .FindSubmatch (body ); len (m ) > 1 {
353+ pronouns := convertStandardizedPronoun (string (m [1 ]))
354+ if pronouns != "" {
355+ prof .Fields ["pronouns" ] = pronouns
356+ }
357+ }
358+
359+ // If no name found, return nil to fall back to HTML parsing
360+ if prof .Name == "" {
361+ return nil
362+ }
363+
364+ return prof
365+ }
366+
367+ // extractTargetMemberURN extracts the member URN for the TARGET profile from HTML.
368+ // This is critical because LinkedIn pages contain URNs for both the logged-in user
369+ // and the profile being viewed. We need to find the URN that belongs to the target.
370+ func extractTargetMemberURN (body []byte , targetUsername string ) string {
371+ // Strategy 1: Look for URN associated with the target username in the URL
372+ // Pattern: fsd_profileCard with publicIdentifier matching target
373+ if targetUsername != "" {
374+ // Look for the pattern that ties publicIdentifier to a member URN
375+ // Example: "publicIdentifier":"stephen-fox-jr"... nearby "fsd_profile:ACoA..."
376+ pattern := fmt .Sprintf (`"publicIdentifier"\s*:\s*"%s"[^}]*}[^{]*\{[^}]*fsd_profile:(ACoA[A-Za-z0-9_-]+)` , regexp .QuoteMeta (targetUsername ))
377+ re := regexp .MustCompile (pattern )
378+ if m := re .FindSubmatch (body ); len (m ) > 1 {
379+ return string (m [1 ])
380+ }
381+ }
382+
383+ // Strategy 2: Look for fsd_profileCard URN which is typically the viewed profile
384+ // Pattern: fsd_profileCard:(ACoA...,SECTION_TYPE
385+ cardRe := regexp .MustCompile (`fsd_profileCard:\((ACoA[A-Za-z0-9_-]+),` )
386+ if match := cardRe .FindSubmatch (body ); len (match ) > 1 {
387+ return string (match [1 ])
388+ }
389+
390+ // Strategy 3: Look for profile URN in the page's data
391+ // The viewed profile's URN often appears in specific contexts
392+ profileRe := regexp .MustCompile (`fsd_profile:(ACoA[A-Za-z0-9_-]+)` )
393+ matches := profileRe .FindAllSubmatch (body , - 1 )
394+
395+ // If we have multiple URNs, we need to identify which is the target
396+ // Usually the most frequently occurring one in certain contexts is the viewed profile
397+ if len (matches ) > 0 {
398+ // Count occurrences of each URN
399+ urnCounts := make (map [string ]int )
400+ for _ , m := range matches {
401+ urn := string (m [1 ])
402+ urnCounts [urn ]++
403+ }
404+
405+ // Return the most common URN (likely the viewed profile)
406+ var maxURN string
407+ maxCount := 0
408+ for urn , count := range urnCounts {
409+ if count > maxCount {
410+ maxCount = count
411+ maxURN = urn
412+ }
413+ }
414+ if maxURN != "" {
415+ return maxURN
416+ }
417+ }
418+
419+ // Last resort: any ACoA pattern
420+ re := regexp .MustCompile (`ACoA[A-Za-z0-9_-]+` )
421+ match := re .Find (body )
422+ if len (match ) > 0 {
423+ return string (match )
424+ }
425+
426+ return ""
427+ }
428+
188429// fetchExperienceFromAPI calls the LinkedIn Voyager API to get profile experience data.
189430func (c * Client ) fetchExperienceFromAPI (ctx context.Context , _ string , memberURN string ) experienceData {
190431 // First, make a request to LinkedIn to get session cookies (JSESSIONID)
0 commit comments