-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy patheasyapplybot.py
More file actions
1678 lines (1483 loc) · 75.5 KB
/
easyapplybot.py
File metadata and controls
1678 lines (1483 loc) · 75.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import datetime
import hashlib
import logging
import os
import pickle
import random
import re
import threading
import time
from typing import Callable, List, Optional
from urllib.parse import urlparse
from pydantic import BaseModel, model_validator
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
try:
import undetected_chromedriver as uc
except ImportError:
uc = None # type: ignore[assignment]
try:
import pyautogui
except ImportError:
pyautogui = None # type: ignore[assignment]
try:
import anthropic as _anthropic
except ImportError:
_anthropic = None # type: ignore[assignment]
try:
from google import genai as _genai
except ImportError:
_genai = None # type: ignore[assignment]
try:
import ollama as _ollama
except ImportError:
_ollama = None # type: ignore[assignment]
from fake_useragent import UserAgent
from openai import OpenAI
from dotenv import load_dotenv
try:
import requests as _requests
except ImportError:
_requests = None # type: ignore[assignment]
from settings import _inject_ai_env, load_settings
load_dotenv() # load .env regardless of entry point (hiringfunnel.py, run_profiles_batch.py, etc.)
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Custom exceptions
# ---------------------------------------------------------------------------
class DailyLimitReachedException(Exception):
"""Raised when LinkedIn's daily Easy Apply submission limit is detected on page."""
pass
class ConsecutiveFailuresException(Exception):
"""Raised when the bot fails to apply to MAX_CONSECUTIVE_FAILURES jobs in a row."""
pass
class H1BAPIUnavailableException(Exception):
"""Raised when the pypes H-1B check API is unreachable during a visa-mode run.
The bot aborts on this exception rather than silently applying to all companies,
because falling back to unrestricted applications defeats the visa-filter guarantee.
"""
pass
# ---------------------------------------------------------------------------
# Pydantic model
# ---------------------------------------------------------------------------
class ProfileConfig(BaseModel):
email: str
password: str
phone_number: str = ""
positions: List[str] = []
locations: List[str] = []
remote_only: bool = False
profile_url: str = ""
user_city: str = ""
user_state: str = ""
zip_code: str = ""
years_experience: int = 0
desired_salary: int = 0
github_url: str = ""
portfolio_url: str = ""
job_boards: List[str] = ["linkedin"]
# H-1B visa filter: when True the bot only applies to companies with a
# documented H-1B sponsorship record (via GET /h1b/check on the pypes API).
# PYPES_BASE_URL env var controls the API base (default: https://api.pypes.dev).
requires_visa: bool = False
# Per-profile whitelist: when non-empty, only apply to jobs whose title
# contains at least one of these terms (case-insensitive substring match).
# Empty list = disabled (apply to everything).
whitelist_titles: List[str] = []
@model_validator(mode='before')
@classmethod
def _migrate_legacy(cls, data):
if isinstance(data, dict):
# openai_api_key → ai_api_key (old field name)
if 'openai_api_key' in data and 'ai_api_key' not in data:
data['ai_api_key'] = data.pop('openai_api_key')
data.setdefault('ai_provider', 'openai')
# ai_provider + ai_api_key moved to system settings (settings.json)
data.pop('ai_provider', None)
data.pop('ai_api_key', None)
# blacklist + blacklist_titles moved to system settings (settings.json)
data.pop('blacklist', None)
data.pop('blacklist_titles', None)
return data
# ---------------------------------------------------------------------------
# Logger setup
# ---------------------------------------------------------------------------
def setup_logger() -> None:
log_dir = os.path.join('.', 'logs')
if not os.path.isdir(log_dir):
os.makedirs(log_dir, exist_ok=True)
logging.basicConfig(
format='%(asctime)s::%(name)s::%(levelname)s::%(message)s',
datefmt='%d-%b-%y %H:%M:%S',
)
log.setLevel(logging.DEBUG)
if not log.handlers:
c_handler = logging.StreamHandler()
c_handler.setLevel(logging.DEBUG)
c_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', '%H:%M:%S')
c_handler.setFormatter(c_format)
log.addHandler(c_handler)
# ---------------------------------------------------------------------------
# Chrome driver factory
# ---------------------------------------------------------------------------
def _make_chrome_driver():
ua = UserAgent()
options = Options()
options.add_argument("--start-maximized")
options.add_argument("--ignore-certificate-errors")
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument("--disable-extensions")
options.add_argument("--disable-blink-features")
options.add_argument(f'--user-agent={ua.random}')
options.add_argument('--disable-blink-features=AutomationControlled')
if os.environ.get("HIRINGFUNNEL_HEADLESS") == "1":
options.add_argument("--headless=new")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")
if uc is not None:
# Detect installed Chrome major version to avoid driver/browser mismatch
chrome_ver = None
try:
import subprocess
out = subprocess.check_output(
["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", "--version"],
stderr=subprocess.DEVNULL, text=True)
chrome_ver = int(out.strip().split()[-1].split(".")[0])
log.info(f"Starting Chrome via undetected-chromedriver (Chrome {chrome_ver})")
except Exception:
log.info("Starting Chrome via undetected-chromedriver")
driver = uc.Chrome(options=options, version_main=chrome_ver)
else:
log.info("Starting Chrome via standard Selenium (undetected-chromedriver not installed)")
options.add_experimental_option("useAutomationExtension", False)
options.add_experimental_option("excludeSwitches", ["enable-automation"])
driver = webdriver.Chrome(options=options)
try:
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined});"
})
except Exception:
pass
driver.set_page_load_timeout(30)
return driver
# ---------------------------------------------------------------------------
# EasyApplyBot
# ---------------------------------------------------------------------------
class EasyApplyBot:
MAX_SEARCH_TIME = 20 * 60 * 60
MAX_CONSECUTIVE_FAILURES = 5
def __init__(self, config: ProfileConfig, on_event: Optional[Callable[[str, dict], None]] = None,
blacklist: Optional[List[str]] = None, blacklist_titles: Optional[List[str]] = None) -> None:
setup_logger()
self._on_event = on_event
log.info("Welcome to Easy Apply Bot")
self._stop_event = threading.Event()
self.applied_count = 0
self.failed_count = 0
self.total_seen = 0
self.consecutive_fail_streak = 0
self.config = config
self.phone_number = config.phone_number
self.location = f"{config.user_city}, {config.user_state}" if config.user_city else ""
self.years_of_experience = str(config.years_experience) if config.years_experience else ""
self.desired_salary = str(config.desired_salary) if config.desired_salary else ""
self.linkedin_profile_url = config.profile_url
self.github_url = config.github_url
self.portfolio_url = config.portfolio_url
self.zip_code = config.zip_code
self.user_state = config.user_state
self.checked_invalid = False
self.blacklist = [c.lower() for c in (blacklist or [])]
self.blacklist_titles = [t.lower() for t in (blacklist_titles or [])]
self.whitelist_titles = [t.lower() for t in config.whitelist_titles]
# H-1B visa filter state (only active when config.requires_visa=True).
# _h1b_cache: per-session company lookup cache keyed by lowercase company name.
# Value: (approved: bool, score: float, matched_name: str)
# _h1b_stats: counters accumulated during the session; printed in end-of-run summary.
self._h1b_cache: dict = {}
self._h1b_stats = {
"checked": 0,
"applied": 0,
"skipped": 0,
"scores": [], # sponsor_score for each approved company applied to
"top_matches": [], # (matched_name, score) for end-of-run summary
}
# Setup Selenium driver
self.browser = self._create_driver()
self.wait = WebDriverWait(self.browser, 30)
def _emit(self, event_type: str, data: Optional[dict] = None) -> None:
"""Call the on_event callback if set."""
if self._on_event is not None:
try:
self._on_event(event_type, data or {})
except Exception as e:
log.debug(f"on_event callback error: {e}")
# -----------------------------------------------------------------------
# H-1B visa filter
# -----------------------------------------------------------------------
def _check_h1b_seeded(self) -> None:
"""Verify the H-1B employer table is populated before starting a visa-mode run.
Calls GET /h1b/health. Raises H1BAPIUnavailableException if:
- the API is unreachable
- employer_count == 0 (seed-h1b.ts was never run)
This guard prevents the silent failure where an empty table causes every
company to return approved=False, skipping 100% of jobs with no warning.
"""
if _requests is None:
raise H1BAPIUnavailableException("'requests' library is not installed — cannot use visa mode")
base = os.environ.get("PYPES_BASE_URL", "https://api.pypes.dev")
secret = os.environ.get("CLIENT_SECRET", "")
headers = {"X-Pypes-Secret": secret} if secret else {}
try:
resp = _requests.get(f"{base}/h1b/health", headers=headers, timeout=5)
resp.raise_for_status()
except Exception as e:
raise H1BAPIUnavailableException(
f"[H-1B] Cannot reach pypes API at {base}: {e}\n"
f"Check PYPES_BASE_URL and CLIENT_SECRET env vars."
)
count = resp.json().get("employer_count", 0)
if count == 0:
raise H1BAPIUnavailableException(
"[H-1B] h1b_employers table is empty — run: bun run seed:h1b\n"
"Without H-1B data, visa-mode would silently skip every company."
)
log.info(f"[H-1B] Employer table verified: {count:,} rows loaded")
def _check_h1b_sponsor(self, company: str) -> tuple:
"""Check if a company has an H-1B sponsorship record via the pypes API.
Returns (approved: bool, score: float, matched_name: str).
Uses an in-session cache keyed by lowercased company name to avoid
duplicate API calls when the same employer appears in multiple listings.
Raises H1BAPIUnavailableException on connection errors or non-2xx responses.
The bot aborts on this exception — do NOT silently fall back to applying
to all companies, as that defeats the visa-filter safety guarantee.
"""
key = company.lower().strip()
if key in self._h1b_cache:
return self._h1b_cache[key]
base = os.environ.get("PYPES_BASE_URL", "https://api.pypes.dev")
secret = os.environ.get("CLIENT_SECRET", "")
headers = {"X-Pypes-Secret": secret} if secret else {}
try:
resp = _requests.get(
f"{base}/h1b/check",
params={"company": company},
headers=headers,
timeout=5,
)
resp.raise_for_status()
data = resp.json()
result = (data.get("approved", False), data.get("score", 0.0), data.get("matched_name", ""))
except _requests.exceptions.ConnectionError as e:
raise H1BAPIUnavailableException(
f"[H-1B] Cannot reach pypes API at {base}: {e}"
)
except Exception as e:
raise H1BAPIUnavailableException(
f"[H-1B] API error for {company!r}: {e}"
)
self._h1b_cache[key] = result
return result
def _h1b_summary_lines(self) -> list:
"""Return the end-of-run H-1B visa filter summary as a list of lines."""
s = self._h1b_stats
lines = [
"",
"=== H-1B Visa Filter ===",
f"Checked: {s['checked']} companies",
f"Approved: {s['applied']} (avg score: {sum(s['scores']) / len(s['scores']):.2f})" if s['scores'] else f"Approved: {s['applied']}",
f"Skipped: {s['skipped']} (no USCIS record or score < 0.4)",
]
if s["top_matches"]:
lines.append("")
lines.append("Top sponsors applied to:")
for name, score in sorted(s["top_matches"], key=lambda x: -x[1])[:5]:
lines.append(f" {name:<30} {score:.2f}")
return lines
# -----------------------------------------------------------------------
def _check_daily_limit(self) -> bool:
"""Return True if LinkedIn's daily submission limit notice is present."""
try:
elements = self.browser.find_elements(By.CLASS_NAME, "artdeco-inline-feedback__message")
if any("limit daily submissions" in el.text.lower() for el in elements):
return True
# Modal dialog: "You reached today's Easy Apply limit"
dialogs = self.browser.find_elements(
By.CSS_SELECTOR, '[data-sdui-screen="com.linkedin.sdui.flagshipnav.jobs.EasyApplyFuseLimitDialogModal"]')
if dialogs:
return True
return False
except Exception:
return False
def _create_driver(self):
return _make_chrome_driver()
# ------------------------------------------------------------------
# Lifecycle helpers
# ------------------------------------------------------------------
def stop(self):
self._stop_event.set()
@property
def stopped(self):
return self._stop_event.is_set()
def close(self):
try:
self.browser.quit()
except Exception:
pass
# ------------------------------------------------------------------
# LinkedIn login
# ------------------------------------------------------------------
def _is_logged_in(self) -> bool:
"""Return True if the current page indicates a logged-in LinkedIn session."""
url = self.browser.current_url
# Logged-in pages: /feed, /jobs, /mynetwork, or the base linkedin.com
# homepage (which only shows when authenticated — unauthenticated users
# get redirected to /login or /authwall).
if "/feed" in url or "/jobs" in url or "/mynetwork" in url:
return True
if "/login" in url or "/authwall" in url or "/checkpoint" in url:
return False
# Check for the global nav element that only renders when logged in
try:
self.browser.find_element(By.ID, "global-nav")
return True
except Exception:
pass
# Fallback: on linkedin.com with no login/authwall path means we're
# authenticated (e.g. https://www.linkedin.com/ after cookie restore)
parsed = urlparse(url)
if parsed.hostname and "linkedin.com" in parsed.hostname:
path = parsed.path.rstrip("/")
if path == "" or path in ("/feed", "/jobs", "/mynetwork", "/messaging"):
return True
return False
# ------------------------------------------------------------------
# Cookie persistence (per-email)
# ------------------------------------------------------------------
def _cookie_path(self, email: str) -> str:
"""Return the cookie file path for the given email."""
slug = hashlib.sha256(email.lower().strip().encode()).hexdigest()[:12]
return os.path.expanduser(f"~/.hiringfunnel/linkedin_session_{slug}.pkl")
def _try_cookie_login(self, email: str) -> bool:
"""Load saved cookies and check if the LinkedIn session is still valid."""
path = self._cookie_path(email)
if not os.path.exists(path):
return False
try:
self.browser.get("https://www.linkedin.com")
time.sleep(1)
with open(path, "rb") as f:
cookies = pickle.load(f)
for cookie in cookies:
try:
self.browser.add_cookie(cookie)
except Exception:
pass
self.browser.refresh()
time.sleep(2)
if self._is_logged_in():
return True
# Session expired — delete stale cookies and fall through
log.info("Saved LinkedIn session expired, falling back to login")
self._emit("cookie_login_expired")
os.remove(path)
return False
except Exception as e:
log.warning(f"Cookie login failed: {e}")
# Remove corrupted cookie file
try:
os.remove(path)
except OSError:
pass
return False
def _save_cookies(self, email: str) -> None:
"""Persist current browser cookies to disk for next run."""
path = self._cookie_path(email)
try:
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "wb") as f:
pickle.dump(self.browser.get_cookies(), f)
log.info("LinkedIn cookies saved for next run")
except Exception as e:
log.warning(f"Could not save cookies: {e}")
# ------------------------------------------------------------------
def _handle_welcome_back(self, email: str) -> bool:
"""Handle LinkedIn's 'Welcome Back' page that lists saved profiles.
If the user's email domain matches a listed profile, click it.
If not, click 'Sign in using another account' to get the standard form.
Returns True if a matching profile was clicked, False if the page
wasn't a Welcome Back page or no match was found.
"""
try:
profile_buttons = self.browser.find_elements(
By.CSS_SELECTOR, "button.member-profile__details"
)
except Exception:
return False
if not profile_buttons:
return False
log.info("Welcome Back page detected with %d saved profile(s)", len(profile_buttons))
# LinkedIn masks emails like "a*****@domain.com" — match on domain
email_domain = email.lower().strip().split("@", 1)[1] if "@" in email else ""
for btn in profile_buttons:
try:
handle = btn.find_element(By.CSS_SELECTOR, "p.profile__handle").text
if handle and "@" in handle:
handle_domain = handle.lower().strip().split("@", 1)[1]
if handle_domain == email_domain:
log.info("Clicking matching saved profile: %s", handle)
btn.click()
return True
except Exception:
continue
# No matching profile — click "Sign in using another account"
try:
other_btn = self.browser.find_element(
By.CSS_SELECTOR, "button.signin-other-account"
)
log.info("No matching saved profile — clicking 'Sign in using another account'")
other_btn.click()
time.sleep(2)
except Exception:
log.warning("Could not find 'Sign in using another account' button")
return False
def start_linkedin(self, username, password) -> bool:
# Try saved cookies first
#_login(username):
# log.info("Logged in via saved cookies")
# self._emit("login_success")
# return True
log.info("Logging in.....Please wait :)")
self.browser.get("https://www.linkedin.com/login?trk=guest_homepage-basic_nav-header-signin")
time.sleep(2)
# Handle "Welcome Back" page — click matching profile or
# "Sign in using another account" to get the standard login form.
# Clicking a profile does NOT authenticate; it just pre-selects the
# account. Always fall through to the password form.
self._handle_welcome_back(username)
time.sleep(2)
try:
user_field = self.browser.find_element("id", "username")
pw_field = self.browser.find_element("id", "password")
login_button = self.browser.find_element(By.CLASS_NAME, "btn__primary--large")
user_field.send_keys(username)
user_field.send_keys(Keys.TAB)
time.sleep(2)
pw_field.send_keys(password)
time.sleep(2)
login_button.click()
time.sleep(3)
# Check for 2FA / verification challenge
if not self._is_logged_in():
log.info("2FA or verification challenge detected — waiting for manual input")
self._emit("2fa_required")
# Poll until logged in or stopped, up to 5 minutes
deadline = time.time() + 300
while not self._is_logged_in() and not self.stopped:
if time.time() > deadline:
log.info("Timed out waiting for 2FA")
self._emit("login_failed", {"error": "2FA timeout"})
return False
time.sleep(3)
self._save_cookies(username)
self._emit("login_success")
return True
except TimeoutException:
log.info("TimeoutException! Username/password field or login button not found")
self._emit("login_failed", {"error": "Timeout finding login fields"})
return False
except Exception as e:
self._emit("login_failed", {"error": str(e)})
return False
# ------------------------------------------------------------------
# Main apply loop
# ------------------------------------------------------------------
def fill_data(self) -> None:
pass
def start_apply(self, positions, locations) -> None:
self.fill_data()
combos = []
while len(combos) < len(positions) * len(locations):
if self.stopped:
return
position = positions[random.randint(0, len(positions) - 1)]
location = locations[random.randint(0, len(locations) - 1)]
combo = (position, location)
if combo not in combos:
combos.append(combo)
log.info(f"Applying to {position}: {location}")
location_param = "&location=" + location
self.applications_loop(position, location_param)
if len(combos) > 500:
break
def applications_loop(self, position, location):
count_application = 0
jobs_per_page = 0
start_time = time.time()
log.info("Looking for jobs.. Please wait..")
try:
self.browser.set_window_position(1, 1)
self.browser.maximize_window()
except Exception as e:
log.info(f"Could not set window size/position: {e}")
self.browser, _ = self.next_jobs_page(position, location, jobs_per_page)
log.info("Looking for jobs.. Please wait..")
while time.time() - start_time < self.MAX_SEARCH_TIME:
if self.stopped:
return
try:
log.info(f"{(self.MAX_SEARCH_TIME - (time.time() - start_time)) // 60} minutes left in this search")
self.load_page()
links = self.browser.find_elements("xpath", '//div[@data-job-id]')
if len(links) == 0:
log.debug("No links found")
break
IDs = []
for link in links:
children = link.find_elements("xpath", './/a[contains(@class, "job-card-container__link")]')
for child in children:
href = child.get_attribute("href")
if href:
parsed_url = urlparse(href)
job_id = parsed_url.path.split('/')[-2]
if job_id:
try:
IDs.append(int(job_id))
except ValueError:
pass
jobIDs = set(IDs)
self.total_seen += len(jobIDs)
for jobID in jobIDs:
if self.stopped:
return
if self.get_job_page(jobID) is None:
continue
# Extract title/company for events
try:
title_parts = self.browser.title.split(' | ')
job_title = re.search(r"\(?\d?\)?\s?(\w.*)", title_parts[0])
job_title = job_title.group(1) if job_title else title_parts[0]
company = re.search(r"(\w.*)", title_parts[1]) if len(title_parts) > 1 else None
company = company.group(1) if company else "Unknown"
except Exception:
job_title = "Unknown"
company = "Unknown"
# Check blacklists
if self.blacklist and any(bc in company.lower() for bc in self.blacklist):
log.info(f"Skipping blacklisted company: {company}")
continue
if self.blacklist_titles and any(bt in job_title.lower() for bt in self.blacklist_titles):
log.info(f"Skipping blacklisted title: {job_title}")
continue
# Whitelist filter: if whitelist is set, only apply to matching titles
if self.whitelist_titles and not any(wt in job_title.lower() for wt in self.whitelist_titles):
log.info(f"Skipping non-whitelisted title: {job_title}")
continue
# H-1B visa filter: skip companies with no USCIS sponsorship record.
# Only active when requires_visa=True. Raises H1BAPIUnavailableException
# on connectivity failure — the outer loop propagates it to abort the run.
sponsor_score: Optional[float] = None
sponsor_matched_name: str = ""
if self.config.requires_visa and company != "Unknown":
approved, score, matched = self._check_h1b_sponsor(company)
self._h1b_stats["checked"] += 1
if not approved:
self._h1b_stats["skipped"] += 1
log.info(f"[H-1B SKIP] {company} — no sponsorship record")
self._emit("h1b_skipped", {"company": company})
continue
self._h1b_stats["applied"] += 1
self._h1b_stats["scores"].append(score)
self._h1b_stats["top_matches"].append((matched, score))
sponsor_score = score
sponsor_matched_name = matched
log.info(f"[H-1B OK] {company} → {matched} (score={score:.2f})")
self._emit("job_applying", {"job_id": str(jobID), "title": job_title, "company": company})
button = self.get_easy_apply_button()
if button is not False:
log.info("Clicking the EASY apply button")
time.sleep(3)
try:
result = self.send_resume(deadline=time.time() + 600)
count_application += 1
if result:
self.applied_count += 1
self.consecutive_fail_streak = 0
event_data: dict = {"job_id": str(jobID), "title": job_title, "company": company}
if sponsor_score is not None:
event_data["sponsor_score"] = sponsor_score
event_data["sponsor_matched_name"] = sponsor_matched_name
self._emit("job_applied", event_data)
else:
self.failed_count += 1
self._emit("job_failed", {"job_id": str(jobID), "title": job_title, "error": "submit failed"})
self.consecutive_fail_streak += 1
if self.consecutive_fail_streak >= self.MAX_CONSECUTIVE_FAILURES:
raise ConsecutiveFailuresException(f"{self.MAX_CONSECUTIVE_FAILURES} consecutive application failures")
except TimeoutError:
self.failed_count += 1
self._emit("job_failed", {"job_id": str(jobID), "title": job_title, "company": company, "error": "timeout"})
self._dismiss_modal()
self.consecutive_fail_streak += 1
if self.consecutive_fail_streak >= self.MAX_CONSECUTIVE_FAILURES:
raise ConsecutiveFailuresException(f"{self.MAX_CONSECUTIVE_FAILURES} consecutive application failures")
continue
except DailyLimitReachedException:
raise
except ConsecutiveFailuresException:
raise
except Exception as e:
log.warning(f"Exception applying to job {jobID}: {e}")
self.failed_count += 1
self._emit("job_failed", {"job_id": str(jobID), "title": job_title, "company": company, "error": str(e)})
self.consecutive_fail_streak += 1
if self.consecutive_fail_streak >= self.MAX_CONSECUTIVE_FAILURES:
raise ConsecutiveFailuresException(f"{self.MAX_CONSECUTIVE_FAILURES} consecutive application failures")
continue
else:
log.info("The button does not exist.")
result = False
self._emit("progress", {
"applied": self.applied_count,
"failed": self.failed_count,
"total_seen": self.total_seen,
})
if count_application != 0 and count_application % 20 == 0:
sleepTime = random.randint(100, 300)
log.info(f"Time for a nap - see you in: {int(sleepTime / 60)} min")
time.sleep(sleepTime)
# Advance to next page after processing all jobs on this page.
# Unconditional so H1B-filtered or blacklisted jobs don't strand
# the browser on a detail page and trigger premature exit.
if jobIDs:
jobs_per_page += 25
log.info("Going to next jobs page")
self.avoid_lock()
self.browser, jobs_per_page = self.next_jobs_page(position, location, jobs_per_page)
except DailyLimitReachedException:
raise
except ConsecutiveFailuresException:
raise
except Exception as e:
log.error(f"Exception in main application loop: {e}")
self._emit("error", {"message": str(e)})
# ------------------------------------------------------------------
# Page / job helpers
# ------------------------------------------------------------------
def get_job_page(self, jobID):
job = 'https://www.linkedin.com/jobs/view/' + str(jobID)
try:
self.browser.get(job)
except TimeoutException:
log.warning(f"Page load timed out for job {jobID}, skipping")
return None
self.job_page = self.load_page()
return self.job_page
def get_easy_apply_button(self):
if self._check_daily_limit():
log.info("Daily application limit detected before button check")
raise DailyLimitReachedException("Daily application limit reached")
try:
button = self.browser.find_elements("xpath", '//*[contains(@aria-label, "Easy Apply to") or contains(@aria-label, "LinkedIn Apply to")]')
if len(button) == 0:
return False
self.browser.execute_script("arguments[0].scrollIntoView({block: 'center'});", button[0])
time.sleep(0.3)
try:
button[0].click()
except Exception:
self.browser.execute_script("arguments[0].click();", button[0])
time.sleep(1)
if self._check_daily_limit():
log.info("Daily application limit detected after button click")
raise DailyLimitReachedException("Daily application limit reached")
return True
except DailyLimitReachedException:
raise
except Exception as e:
log.error(f"exception in get_easy_apply_button: {e}")
return False
def wait_for_loader_to_disappear(self, timeout=10):
try:
WebDriverWait(self.browser, timeout).until(
EC.invisibility_of_element_located((By.CLASS_NAME, "jobs-loader"))
)
except Exception:
pass
time.sleep(0.5)
def fill_out_phone_number(self):
def is_present(button_locator) -> bool:
return len(self.browser.find_elements(button_locator[0], button_locator[1])) > 0
try:
next_locater = (By.CSS_SELECTOR, "button[aria-label='Continue to next step']")
input_field = self.browser.find_element("xpath", "//input[contains(@id,'phoneNumber')]")
if input_field:
input_field.clear()
input_field.send_keys(self.phone_number)
time.sleep(random.uniform(4.5, 6.5))
next_locater = (By.CSS_SELECTOR, "button[aria-label='Continue to next step']")
error_locator = (By.CLASS_NAME, "artdeco-inline-feedback__message")
button = None
if is_present(next_locater):
button = self.wait.until(EC.element_to_be_clickable(next_locater))
if is_present(error_locator):
for element in self.browser.find_elements(error_locator[0], error_locator[1]):
text = element.text
if "Please enter" in text:
button = None
break
if button:
button.click()
time.sleep(random.uniform(1.5, 2.5))
except Exception:
log.debug("Could not find phone number field")
def _dismiss_modal(self) -> None:
"""Attempt to dismiss any open Easy Apply modal after a timeout or error."""
for selector in [
"button[aria-label='Dismiss']",
"button[aria-label='Cancel']",
]:
try:
btn = self.browser.find_element(By.CSS_SELECTOR, selector)
btn.click()
time.sleep(0.5)
return
except Exception:
pass
# Fallback: send Escape key to close any open overlay
try:
self.browser.find_element(By.TAG_NAME, "body").send_keys(Keys.ESCAPE)
except Exception:
pass
def send_resume(self, deadline: Optional[float] = None) -> bool:
def has_errors() -> bool:
return len(self.browser.find_elements(By.XPATH, '//*[contains(@type, "error-pebble-icon")]')) > 0
try:
# Wait for the Easy Apply modal to appear before looking for buttons
try:
WebDriverWait(self.browser, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR,
"div.jobs-easy-apply-content, div.jobs-easy-apply-modal"))
)
except TimeoutException:
log.warning("Easy Apply modal did not appear within 10s")
return False
time.sleep(random.uniform(0.3, 0.8))
next_locater = (By.XPATH, "//button[contains(@aria-label,'Continue to next step') or contains(@aria-label,'continue to next step')]")
review_locater = (By.XPATH, "//button[contains(@aria-label,'Review your application') or contains(@aria-label,'review your application')]")
submit_locater = (By.XPATH, "//button[contains(@aria-label,'Submit application') or contains(@aria-label,'submit application')]")
submit_btn_locator = (By.CSS_SELECTOR, "button[data-live-test-easy-apply-submit-button]")
error_locator = (By.CLASS_NAME, "artdeco-inline-feedback__message")
follow_locator = (By.CSS_SELECTOR, "label[for='follow-company-checkbox']")
buttons = [next_locater, review_locater,
submit_locater, submit_btn_locator]
def _find_any_button(driver):
"""Return (index, element) for the first action button found, or False."""
for idx, loc in enumerate(buttons):
elems = driver.find_elements(loc[0], loc[1])
if elems:
return (idx, elems[0])
return False
submitted = False
no_progress_count = 0
while True:
if deadline is not None and time.time() > deadline:
raise TimeoutError("Job application timed out")
if self.stopped:
return False
# Uncheck "follow company" if present (before looking for action buttons)
try:
follow_els = self.browser.find_elements(follow_locator[0], follow_locator[1])
if follow_els:
follow_els[0].click()
time.sleep(0.3)
except Exception:
pass
# Poll for any action button for up to 5 seconds (0.5s intervals)
button = None
button_idx = None
try:
result = WebDriverWait(self.browser, 5).until(_find_any_button)
button_idx, button = result
except TimeoutException:
pass
# Handle form validation errors regardless of button state
if self.browser.find_elements(error_locator[0], error_locator[1]):
try:
for element in self.browser.find_elements(error_locator[0], error_locator[1]):
text = element.text
if "integer" in text.lower() or "whole number" in text.lower():
try:
inp = element.find_element(By.XPATH, "./ancestor::div[contains(@class,'fb-dash-form-element')][1]//input")
inp.clear()
inp.send_keys(str(self.years_of_experience))
log.info(f"Replaced non-integer value with years_of_experience due to: {text}")
except Exception as ie:
log.debug(f"Could not fix integer field: {ie}")
elif ("Please enter" in text or "Please make" in text or "Enter a" in text or "Select checkbox to proceed") and not self.checked_invalid:
self.fill_invalids()
break
except Exception as e:
log.info(e)
if button:
no_progress_count = 0
self.browser.execute_script("arguments[0].scrollIntoView({block: 'center'});", button)
time.sleep(0.3)
try:
button.click()
except Exception:
self.browser.execute_script("arguments[0].click();", button)
time.sleep(random.uniform(0.5, 1.5))
if button_idx in (2, 3):
submitted = True
if submitted:
break
continue
else:
no_progress_count += 1
if no_progress_count == 2:
try:
modal_buttons = self.browser.find_elements(By.CSS_SELECTOR, "div.jobs-easy-apply-content button")
labels = [b.get_attribute("aria-label") or b.text.strip() for b in modal_buttons[:10]]
log.debug(f"Modal buttons found: {labels}")
except Exception:
pass
if no_progress_count >= 6:
log.warning("No actionable buttons found after 30s, abandoning application")
return False
if submitted:
self.checked_invalid = False
log.info("Application Submitted")
break
time.sleep(random.uniform(1.5, 2.5))
except Exception as e:
log.info(f"{e} - cannot apply to this job")
raise e
return submitted
# ------------------------------------------------------------------
# Field label / value helpers
# ------------------------------------------------------------------
def get_field_label(self, input_element):