diff --git a/.github/workflows/indexer-backfill.yml b/.github/workflows/indexer-backfill.yml
index f67852023..3f8ddd01d 100644
--- a/.github/workflows/indexer-backfill.yml
+++ b/.github/workflows/indexer-backfill.yml
@@ -62,10 +62,15 @@ on:
         default: '50'
         type: string
       path_prefix:
-        description: 'Scope crawl to one path prefix (e.g. .agents/skills); empty = all prefixes'
+        description: 'Scope crawl to one path prefix (e.g. .agents/skills); empty = broad query (all depths)'
         required: false
         default: ''
         type: string
+      max_ranges:
+        description: 'SMI-5286 1c: per-dispatch size-(sub)range budget before a checkpoint+exit (default: 150)'
+        required: false
+        default: '150'
+        type: string
       supabase_env:
         description: 'Target Supabase environment'
         required: false
@@ -181,6 +186,14 @@ jobs:
           RESUME_FROM: ${{ github.event.inputs.resume_from || 'latest' }}
           BACKFILL_MAX_SKILLS_PER_REPO: ${{ github.event.inputs.max_skills_per_repo || '50' }}
           BACKFILL_PATH_PREFIX: ${{ github.event.inputs.path_prefix || '' }}
+          # SMI-5286 1c: run ONLY Phase 3 (the size-faceted subdirectory crawl) +
+          # finalize each dispatch — topic/high-trust are the cron's job, so this
+          # keeps every backfill dispatch focused and resumable on the facet cursor.
+          DISCOVERY_PHASE: '3'
+          # SMI-5286 1c: per-dispatch (sub)range budget — the facet driver writes a
+          # checkpoint after this many ranges so the run fits the GHA cap; re-dispatch
+          # with resume_from=latest until facets_remaining=0.
+          BACKFILL_MAX_RANGES: ${{ github.event.inputs.max_ranges || '150' }}
           # Raised caps for backfill mode (per SPARC section #3).
           # These override the conservative cron defaults.
           CODE_SEARCH_MAX_PAGES: '10'
@@ -224,6 +237,11 @@ jobs:
           CAP_SATURATED=$(printf '%s' "$RESPONSE" | jq -r '.data.backfill.cap_saturated // false')
           TRUNCATED_REPO_COUNT=$(printf '%s' "$RESPONSE" | jq -r '.data.backfill.truncated_repo_count // 0')
           TOKEN_SOURCE=$(printf '%s' "$RESPONSE" | jq -r '.data.backfill.token_source // "unknown"')
+          # SMI-5286 1c (M-2): true crawl position. current_facet == 'done' is the
+          # AUTHORITATIVE terminal signal (facets_remaining alone reads 0 while the
+          # last facet's bisected sub-ranges still drain).
+          CURRENT_FACET=$(printf '%s' "$RESPONSE" | jq -r '.data.backfill.current_facet // "unknown"')
+          PENDING_SUBRANGES=$(printf '%s' "$RESPONSE" | jq -r '.data.backfill.pending_subrange_count // 0')
 
           # CRITICAL: token_source must be exactly "pat" on every backfill dispatch.
           # An "app" value means the App env entries leaked in (consuming the cron's
@@ -247,15 +265,19 @@ jobs:
             echo "| Facets Total | $FACETS_TOTAL |"
             echo "| Facets Completed | $FACETS_COMPLETED |"
             echo "| Facets Remaining | $FACETS_REMAINING |"
+            echo "| Current Facet | $CURRENT_FACET |"
+            echo "| Pending Sub-ranges | $PENDING_SUBRANGES |"
             echo "| Checkpoint ID | $CHECKPOINT_ID |"
             echo "| Cap Saturated | $CAP_SATURATED |"
             echo "| Truncated Repo Count | $TRUNCATED_REPO_COUNT |"
             echo "| Token Source | $TOKEN_SOURCE |"
             echo ""
-            if [ "$FACETS_REMAINING" = "0" ]; then
-              echo "**TERMINAL CONDITION MET**: facets_remaining == 0. Backfill loop is complete."
+            # current_facet == 'done' is authoritative: it is set only when the
+            # ladder AND the bisection frontier are both exhausted (SMI-5286 1c C-1/M-2).
+            if [ "$CURRENT_FACET" = "done" ]; then
+              echo "**TERMINAL CONDITION MET**: current_facet == 'done'. Backfill loop is complete."
             else
-              echo "**Backfill continues.** Re-dispatch with resume_from=latest to pick up from checkpoint $CHECKPOINT_ID."
+              echo "**Backfill continues** (current_facet=$CURRENT_FACET, pending_subranges=$PENDING_SUBRANGES). Re-dispatch with resume_from=latest to pick up from checkpoint $CHECKPOINT_ID."
             fi
           } >> "$GITHUB_STEP_SUMMARY"
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 69a49303c..4143b2bbc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- **Indexer Backfill Facet Driver** (2026-06-18, SMI-5286 sub-wave 1c): the
+  out-of-band backfill (`indexer-backfill.yml`) now crawls the full
+  `filename:SKILL.md` universe past GitHub code-search's 1000-result-per-query
+  cap by partitioning the broad query into a fixed `size:` byte-range ladder
+  (`code-search.facets.ts`) with adaptive bisect-on-saturation: any facet whose
+  `total_count` exceeds the cap is split and its halves crawled before the next
+  facet, so every file is reachable. The depth-first frontier (facet index +
+  bisection stack + page) is fully captured by the checkpoint cursor — extended
+  with `pending_subranges` — so a dispatch boundary mid-bisection resumes
+  losslessly across the 6h GHA cap. `per_page` raised 30→100; `BACKFILL_PATH_PREFIX`
+  scopes a one-ecosystem DRY_RUN; `DISCOVERY_PHASE=3` focuses each dispatch on the
+  Phase-3b crawl + finalize. Also fixes a latent root-`SKILL.md` drop in
+  `fetchSkillPathsFromTree` (`trees-search.ts`) — repos whose only skill is a root
+  `SKILL.md` are now emitted as `path:''` instead of silently lost. Gated: the
+  live (`DRY_RUN=false`) crawl requires explicit operator sign-off.
 - **Vendor-Org Trust Tier** (2026-05-02, SMI-4651): GitHub-verified vendor
   organizations (Stripe, Notion, Atlassian, Figma, Canva, Zapier, Cloudflare,
   and any future verified org) are now auto-promoted to the `curated` trust
diff --git a/scripts/indexer/backfill-checkpoint.ts b/scripts/indexer/backfill-checkpoint.ts
index 1c66f2a63..c157de2b7 100644
--- a/scripts/indexer/backfill-checkpoint.ts
+++ b/scripts/indexer/backfill-checkpoint.ts
@@ -22,21 +22,174 @@
  */
 
 import type { SupabaseClient } from '@supabase/supabase-js'
+import { type SizeFacet, buildSizeFacets, facetId, bisectFacet } from './code-search.facets.ts'
 
 /** `event_type` discriminator for backfill checkpoint rows in `audit_logs`. */
 export const BACKFILL_CHECKPOINT_EVENT_TYPE = 'indexer_backfill_checkpoint'
 
+/**
+ * A persisted size sub-range `[lo, hi]`. `hi` is `null` when the range is
+ * open-ended (`Infinity`) — `Infinity` does NOT survive `JSON.stringify`
+ * (it serializes to `null`), so the cursor uses an explicit `null` sentinel and
+ * {@link deserializeRange} maps it back to `Number.POSITIVE_INFINITY`.
+ */
+export type PersistedSubrange = [number, number | null]
+
 /**
  * Resume cursor. `(path, facet, last_page)` lets a re-dispatch resume mid-facet,
  * not just at facet boundaries (SPARC §#5 facet-AND-page granularity).
  */
 export interface BackfillCursor {
-  /** The path-prefix facet being crawled (e.g. '.agents/skills'). */
+  /** The path-prefix being crawled ('' = the broad, no-`path:` query). */
   path: string
-  /** The active facet window within that path (e.g. a date/size bucket; Wave 1c). */
+  /** Stable id of the active size facet/sub-range ({@link facetId}); 'done' when complete. */
   facet: string
-  /** Last code-search page consumed within the current facet (1-based). */
+  /** Last code-search page consumed within the current (sub)range (1-based; 0 = none yet). */
   last_page: number
+  /**
+   * SMI-5286 1c: 0-based index of the next top-level facet to process in the
+   * static {@link buildSizeFacets} ladder. Incremented when a top-level facet is
+   * RETIRED — either fully drained OR bisected (its sub-ranges, tracked in
+   * `pending_subranges`, then cover it). So it counts top-level facets whose
+   * coverage is committed, NOT necessarily finished crawling; use `current_facet`
+   * / `pending_subrange_count` (in the run summary) to tell 'bisecting' from 'done'.
+   */
+  facet_index?: number
+  /**
+   * SMI-5286 1c: the in-progress bisection frontier — sub-ranges of the current
+   * facet not yet fully crawled (DFS stack; the LAST element is crawled next).
+   * Persisted so a dispatch boundary mid-bisection resumes without losing
+   * not-yet-crawled sub-ranges (the bare `(path,facet,last_page)` cursor cannot
+   * represent a partial bisection tree, C-2).
+   */
+  pending_subranges?: PersistedSubrange[]
+}
+
+/** Map a runtime {@link SizeFacet} to its JSON-safe persisted form (`Infinity` → `null`). */
+function serializeRange(facet: SizeFacet): PersistedSubrange {
+  return [facet.lo, Number.isFinite(facet.hi) ? facet.hi : null]
+}
+
+/** Map a persisted sub-range back to a runtime {@link SizeFacet} (`null` → `Infinity`). */
+function deserializeRange([lo, hi]: PersistedSubrange): SizeFacet {
+  return { lo, hi: hi == null ? Number.POSITIVE_INFINITY : hi }
+}
+
+/**
+ * Runtime crawl frontier reconstructed from a {@link BackfillCursor}. The facet
+ * driver is a depth-first walk of the static size ladder: each top-level facet
+ * that saturates the 1000-result cap is bisected into `pendingSubranges`, which
+ * are drained (themselves bisecting further) before `facetIndex` advances.
+ */
+export interface FacetCrawlState {
+  /** Index into {@link buildSizeFacets} of the current top-level facet. */
+  facetIndex: number
+  /** DFS stack of sub-ranges still to crawl for the current facet; head crawled next. */
+  pendingSubranges: SizeFacet[]
+  /** Last page consumed within the current (sub)range (0 = none). */
+  lastPage: number
+}
+
+/** Reconstruct the crawl frontier from a persisted cursor (or a cold start). */
+export function cursorToFacetState(cursor: BackfillCursor | null | undefined): FacetCrawlState {
+  if (!cursor) return { facetIndex: 0, pendingSubranges: [], lastPage: 0 }
+  return {
+    facetIndex: cursor.facet_index ?? 0,
+    pendingSubranges: (cursor.pending_subranges ?? []).map(deserializeRange),
+    lastPage: cursor.last_page ?? 0,
+  }
+}
+
+/**
+ * The range currently being crawled: the head of the bisection stack, else the
+ * top-level facet at `facetIndex`. `null` once the ladder is exhausted.
+ */
+export function currentFacetRange(
+  state: FacetCrawlState,
+  facets: SizeFacet[] = buildSizeFacets()
+): SizeFacet | null {
+  if (state.pendingSubranges.length > 0) {
+    return state.pendingSubranges[state.pendingSubranges.length - 1]
+  }
+  if (state.facetIndex < facets.length) return facets[state.facetIndex]
+  return null
+}
+
+/**
+ * Replace the current saturated range with its two halves (the first half is
+ * crawled next). Resets the page cursor. Returns false when the range cannot
+ * subdivide (the caller then records truncation and advances).
+ *
+ * Retirement: a saturated range is REPLACED by its halves, so it must never be
+ * revisited. If it was a sub-range (stack non-empty) we pop it; if it was the
+ * TOP-LEVEL facet (stack empty) we advance `facetIndex` past it before pushing —
+ * otherwise, once the halves drain, `currentFacetRange` would return the same
+ * top-level facet again, it would re-saturate, and the crawl would loop forever
+ * without advancing `facets_completed` (governance C-1).
+ */
+export function bisectCurrentFacet(state: FacetCrawlState, range: SizeFacet): boolean {
+  const halves = bisectFacet(range)
+  if (!halves) return false
+  if (state.pendingSubranges.length > 0) {
+    state.pendingSubranges.pop() // retire the sub-range being bisected
+  } else {
+    state.facetIndex++ // retire the top-level facet — its halves now cover it
+  }
+  // Push so halves[0] ends up on top (LIFO) → the lower sub-range is crawled next.
+  state.pendingSubranges.push(halves[1], halves[0])
+  state.lastPage = 0
+  return true
+}
+
+/**
+ * Advance past the current exhausted (or unbisectable-saturated) range: pop the
+ * bisection stack if non-empty, else advance the top-level facet index. Resets
+ * the page cursor.
+ */
+export function advanceFacet(state: FacetCrawlState): void {
+  if (state.pendingSubranges.length > 0) state.pendingSubranges.pop()
+  else state.facetIndex++
+  state.lastPage = 0
+}
+
+/** True when every top-level facet AND its bisection frontier are exhausted. */
+export function isFacetCrawlDone(
+  state: FacetCrawlState,
+  facets: SizeFacet[] = buildSizeFacets()
+): boolean {
+  return state.facetIndex >= facets.length && state.pendingSubranges.length === 0
+}
+
+/** Serialize the crawl frontier back into a persisted {@link BackfillCursor}. */
+export function facetStateToCursor(
+  state: FacetCrawlState,
+  pathPrefix: string,
+  facets: SizeFacet[] = buildSizeFacets()
+): BackfillCursor {
+  const range = currentFacetRange(state, facets)
+  return {
+    path: pathPrefix,
+    facet: range ? facetId(range) : 'done',
+    last_page: state.lastPage,
+    facet_index: state.facetIndex,
+    pending_subranges: state.pendingSubranges.map(serializeRange),
+  }
+}
+
+/**
+ * The outcome of one dispatch's facet crawl: the advanced cursor to persist, a
+ * terminal flag, and the operator-observable counters. Lives here (not in
+ * `subdirectory-search.ts`) so `indexer-types.ts` can reference it without
+ * importing the search module.
+ */
+export interface BackfillCrawlOutcome {
+  cursor: BackfillCursor
+  done: boolean
+  cap_saturated: boolean
+  truncated_repo_count: number
+  facets_completed: number
+  facets_total: number
+  ranges_crawled: number
 }
 
 /**
@@ -113,6 +266,15 @@ export interface BackfillSummary {
   facets_remaining: number
   cap_saturated: boolean
   truncated_repo_count: number
+  /**
+   * SMI-5286 1c (M-2): true crawl position. `facets_remaining` is coarse — it
+   * reads 0 once the last top-level facet is retired even while its bisected
+   * sub-ranges are still draining. `current_facet` (the active (sub)range id, or
+   * 'done') + `pending_subrange_count` (bisection-frontier depth) let the operator
+   * distinguish "finished" from "still bisecting".
+   */
+  current_facet?: string
+  pending_subrange_count?: number
 }
 
 /**
diff --git a/scripts/indexer/code-search.facets.ts b/scripts/indexer/code-search.facets.ts
new file mode 100644
index 000000000..854715c35
--- /dev/null
+++ b/scripts/indexer/code-search.facets.ts
@@ -0,0 +1,139 @@
+/**
+ * Size-facet partitioner for the broad `filename:SKILL.md` code-search backfill
+ * @module scripts/indexer/code-search.facets
+ *
+ * SMI-5286 Wave 1c: the broad community code-search query
+ * (`filename:SKILL.md`) saturates GitHub's hard 1000-result ceiling, so a single
+ * paginated pass can never reach the long tail. This module partitions that one
+ * query by file SIZE into a fixed ladder of disjoint, exhaustive byte-size
+ * buckets so each sub-query returns < 1000 results, with adaptive
+ * bisect-on-saturation for the dense low buckets.
+ *
+ * Why size and not date: GitHub /search/code's `size:` qualifier IS a real,
+ * probe-verified filter, whereas `created:`/`pushed:` are tokenized as
+ * free-text content (SMI-5176) and crush results to files that literally
+ * contain the date string. Size is therefore the only viable partitioner.
+ *
+ * This module is pure (no I/O, no GitHub dependency): it produces facet ranges,
+ * stable labels, the `size:` qualifier string, and a bisection helper. The
+ * caller (the facet driver) owns dispatch, pagination, and the checkpoint
+ * cursor; it passes the already-formatted qualifier string into
+ * `code-search.ts` so that file stays free of any facet dependency.
+ */
+
+/** A disjoint, inclusive byte-size bucket over the SKILL.md blob size. */
+export interface SizeFacet {
+  /** Inclusive lower byte bound (>= 0). */
+  lo: number
+  /** Inclusive upper byte bound; Number.POSITIVE_INFINITY for the open-ended final bucket. */
+  hi: number
+}
+
+/**
+ * The fixed, pre-enumerated size-bucket ladder. Returns the SAME array every
+ * call so `facets_total = buildSizeFacets().length` is STATIC across dispatches
+ * (the checkpoint cursor's `facets_completed` count is meaningless if the ladder
+ * can change). Buckets are disjoint and EXHAUSTIVELY cover [0, ∞):
+ *   facets[0].lo === 0; facets[i+1].lo === facets[i].hi + 1; last.hi === Infinity.
+ *
+ * The ladder doubles each bucket's width as size grows. SKILL.md files are
+ * small, so the low buckets are dense and WILL bisect at runtime — that is the
+ * expected, designed behaviour of the adaptive split.
+ *
+ * @returns The frozen 9-bucket size ladder (stable identity across calls)
+ */
+export function buildSizeFacets(): SizeFacet[] {
+  return SIZE_FACETS
+}
+
+/**
+ * The canonical size-bucket ladder, enumerated once at module load so
+ * `buildSizeFacets()` returns a stable array identity. Buckets are
+ * inclusive-inclusive, disjoint, contiguous, and exhaustively cover [0, ∞):
+ * the first bucket starts at 0, each subsequent `lo` is the prior `hi + 1`, the
+ * width doubles each step, and the final bucket is open-ended.
+ */
+const SIZE_FACETS: SizeFacet[] = Object.freeze([
+  { lo: 0, hi: 127 },
+  { lo: 128, hi: 255 },
+  { lo: 256, hi: 511 },
+  { lo: 512, hi: 1023 },
+  { lo: 1024, hi: 2047 },
+  { lo: 2048, hi: 4095 },
+  { lo: 4096, hi: 8191 },
+  { lo: 8192, hi: 16383 },
+  { lo: 16384, hi: Number.POSITIVE_INFINITY },
+]) as SizeFacet[]
+
+/**
+ * Stable label for a facet, used as the checkpoint cursor `facet` string.
+ * Finite: `${lo}-${hi}`. Open-ended: `${lo}+`.
+ *
+ * @param facet - The size bucket to label
+ * @returns A stable, human-readable facet identifier
+ */
+export function facetId(facet: SizeFacet): string {
+  return facet.hi === Number.POSITIVE_INFINITY ? `${facet.lo}+` : `${facet.lo}-${facet.hi}`
+}
+
+/**
+ * The GitHub /search/code size qualifier for this facet. INCLUSIVE-INCLUSIVE:
+ * finite → `size:${lo}..${hi}`; open-ended (hi === Infinity) → `size:>=${lo}`.
+ * (Off-by-one boundaries double-count — buckets are already inclusive-inclusive.)
+ *
+ * @param facet - The size bucket to render
+ * @returns The `size:` qualifier string to append to the code-search query
+ */
+export function facetToQualifier(facet: SizeFacet): string {
+  return facet.hi === Number.POSITIVE_INFINITY
+    ? `size:>=${facet.lo}`
+    : `size:${facet.lo}..${facet.hi}`
+}
+
+/**
+ * Upper ceiling for open-ended bisection (bytes). A SKILL.md larger than 4 MiB is
+ * not a real skill, so once an open-ended bucket's lower bound passes this the
+ * tail is treated as unsplittable (the caller records it truncated rather than
+ * bisecting forever — doubling an open-ended range never reaches `lo === hi`, so
+ * a persistently-saturating open-ended facet would otherwise loop infinitely).
+ */
+const OPEN_ENDED_BISECT_CEILING = 4 * 1024 * 1024
+
+/**
+ * Split a facet into two disjoint, contiguous, inclusive halves that together
+ * cover the SAME range (used when a facet saturates the 1000-result cap).
+ * Finite: mid = lo + floor((hi - lo) / 2) → [{lo, hi: mid}, {lo: mid+1, hi}].
+ * Open-ended (hi === Infinity): pivot by doubling →
+ *   [{lo, hi: lo*2 - 1}, {lo: lo*2, hi: Infinity}] (requires 0 < lo < ceiling).
+ * Returns null when the facet CANNOT subdivide (finite with lo >= hi; open-ended
+ * with lo === 0; or open-ended past {@link OPEN_ENDED_BISECT_CEILING}).
+ *
+ * @param facet - The saturated size bucket to bisect
+ * @returns A two-element tuple of contiguous halves, or null when unsplittable
+ */
+export function bisectFacet(facet: SizeFacet): [SizeFacet, SizeFacet] | null {
+  if (facet.hi === Number.POSITIVE_INFINITY) {
+    // Open-ended bucket: pivot by doubling the lower bound. A bucket starting at
+    // 0 cannot double (0 * 2 === 0), and past the ceiling there are no real
+    // skills left to partition — both are unsplittable, guard them so a
+    // persistently-saturating open-ended tail terminates instead of doubling
+    // forever.
+    if (facet.lo <= 0 || facet.lo >= OPEN_ENDED_BISECT_CEILING) {
+      return null
+    }
+    const pivot = facet.lo * 2
+    return [
+      { lo: facet.lo, hi: pivot - 1 },
+      { lo: pivot, hi: Number.POSITIVE_INFINITY },
+    ]
+  }
+  // Finite bucket: a single-byte (or inverted) range cannot subdivide.
+  if (facet.lo >= facet.hi) {
+    return null
+  }
+  const mid = facet.lo + Math.floor((facet.hi - facet.lo) / 2)
+  return [
+    { lo: facet.lo, hi: mid },
+    { lo: mid + 1, hi: facet.hi },
+  ]
+}
diff --git a/scripts/indexer/code-search.ts b/scripts/indexer/code-search.ts
index fdd644ff9..c25e0376a 100644
--- a/scripts/indexer/code-search.ts
+++ b/scripts/indexer/code-search.ts
@@ -75,7 +75,10 @@ const RETRY_DELAYS = [1000, 2000, 4000]
  */
 export async function searchCodeForSkillMd(
   page: number,
-  perPage = 30,
+  // SMI-5286 1c: default per_page raised 30 → 100 (GitHub max) so each page
+  // drains the 1000-result ceiling in fewer requests. The root phase stays
+  // disabled in 1c, so no size facet is threaded here.
+  perPage = 100,
   telemetry: RateLimitTelemetry
 ): Promise<{ repos: GitHubRepository[]; total: number; retries: number; error?: string }> {
   // Build query: find root-level SKILL.md files.
@@ -215,8 +218,14 @@ export function extractSkillPath(itemPath: string): string {
 export async function searchCodeForSkillMdInSubdirectory(
   pathPrefix: string | undefined,
   page: number,
-  perPage = 30,
-  telemetry: RateLimitTelemetry
+  // SMI-5286 1c: default per_page raised 30 → 100 (GitHub max).
+  perPage = 100,
+  telemetry: RateLimitTelemetry,
+  // SMI-5286 1c: optional pre-formatted GitHub `size:` qualifier (e.g.
+  // `size:0..127`) appended to the query so the broad backfill can partition the
+  // 1000-result-capped query by file size. The caller (the facet driver) formats
+  // it via code-search.facets.ts; this file stays free of the facet dependency.
+  sizeQualifier?: string
 ): Promise<{
   repos: GitHubRepository[]
   total: number
@@ -238,7 +247,11 @@ export async function searchCodeForSkillMdInSubdirectory(
   // Build query: broad (no path constraint) or scoped to pathPrefix.
   // SMI-5176: date qualifiers (created:>/pushed:>) are NOT functional on GitHub
   // code search — they are tokenized as free-text content. No freshness qualifier.
-  const queryStr = pathPrefix ? `filename:SKILL.md path:${pathPrefix}` : 'filename:SKILL.md'
+  const baseQuery = pathPrefix ? `filename:SKILL.md path:${pathPrefix}` : 'filename:SKILL.md'
+  // SMI-5286 1c: append the size facet qualifier (already INCLUSIVE-INCLUSIVE,
+  // e.g. `size:0..127`) BEFORE encoding so the partitioned backfill stays under
+  // the 1000-result ceiling. The qualifier is part of queryStr pre-encode.
+  const queryStr = sizeQualifier ? `${baseQuery} ${sizeQualifier}` : baseQuery
   const query = encodeURIComponent(queryStr)
   const url = `https://api.github.com/search/code?q=${query}&per_page=${perPage}&page=${page}`
 
diff --git a/scripts/indexer/discovery-orchestrator.ts b/scripts/indexer/discovery-orchestrator.ts
index e14d4a4da..5d510f3ba 100644
--- a/scripts/indexer/discovery-orchestrator.ts
+++ b/scripts/indexer/discovery-orchestrator.ts
@@ -25,7 +25,7 @@ import {
   type RateLimitTelemetry,
 } from './_shared/rate-limit.ts'
 import { type SkillMdValidation } from './skill-processor.ts'
-import { runSubdirectorySearch } from './subdirectory-search.ts'
+import { runSubdirectorySearchPhase, type BackfillFacetPlan } from './subdirectory-search.ts'
 import { runCategorization, runCodeSearch, runUpsertPhase } from './indexer-runners.ts'
 import { applyTreeHashTouches, type TreeHashTouchEntry } from './tree-hash-touch.ts'
 import type { RotationSource } from './topic-rotation.ts'
@@ -124,6 +124,13 @@ export interface RunDiscoveryParams {
    * real skills). Default false → byte-identical cron path.
    */
   backfillMode?: boolean
+  /**
+   * SMI-5286 1c: when set (backfill dispatches), Phase 3b runs the resumable
+   * size-faceted crawl from this plan's cursor instead of the legacy broad+
+   * fallback loop. `run.ts` builds it from the checkpoint; the advanced cursor
+   * returns on `result.backfill_crawl`.
+   */
+  backfillFacetPlan?: BackfillFacetPlan
 }
 
 export async function runDiscovery(params: RunDiscoveryParams): Promise<IndexerResult> {
@@ -148,6 +155,7 @@ export async function runDiscovery(params: RunDiscoveryParams): Promise<IndexerR
     killSwitchEngaged,
     discoveryPhase,
     backfillMode = false,
+    backfillFacetPlan,
   } = params
 
   // SMI-4870: phase gates. When `discoveryPhase` is unset every gate is true,
@@ -336,40 +344,19 @@ export async function runDiscovery(params: RunDiscoveryParams): Promise<IndexerR
   // Enable with: SKILLSMITH_ENABLE_SUBDIRECTORY_SEARCH=true
   // SMI-4870: only the phase-3 sub-slot (or the legacy path) runs subdir search.
   if (runPhase3 && process.env.SKILLSMITH_ENABLE_SUBDIRECTORY_SEARCH === 'true') {
-    try {
-      const subdirResult = await runSubdirectorySearch(
-        seenUrls,
-        validationCache,
-        validationOptions,
-        codeSearchMaxPages,
-        telemetry
-      )
-      for (const repo of subdirResult.repos) {
-        repositories.push(repo)
-      }
-      result.errors.push(...subdirResult.errors)
-      result.subdirectory_search = {
-        repos_found: subdirResult.repos.length,
-        total_found: subdirResult.totalFound,
-        retries: subdirResult.retries,
-        license_filtered: subdirResult.licenseFiltered,
-        license_fetch_failed: subdirResult.licenseFetchFailed,
-        incomplete_results: subdirResult.incompleteResults,
-        search_mode: subdirResult.searchMode,
-      }
-    } catch (err) {
-      console.warn(
-        `[CodeSearch] Phase 3b failed: ${err instanceof Error ? err.message : 'Unknown'}`
-      )
-      result.subdirectory_search = {
-        repos_found: 0,
-        total_found: 0,
-        retries: 0,
-        license_filtered: 0,
-        license_fetch_failed: 0,
-        error: 'phase_failed',
-      }
-    }
+    // SMI-5286 1c: extracted to `runSubdirectorySearchPhase` (keeps this file
+    // under the 500-line gate). In backfill mode `backfillFacetPlan` routes it to
+    // the size-faceted crawl; the advanced cursor lands on `result.backfill_crawl`.
+    await runSubdirectorySearchPhase({
+      seenUrls,
+      validationCache,
+      validationOptions,
+      codeSearchMaxPages,
+      telemetry,
+      repositories,
+      result,
+      backfillFacetPlan,
+    })
   }
 
   // Count total SKILL.md files on GitHub for homepage stats display.
diff --git a/scripts/indexer/indexer-types.ts b/scripts/indexer/indexer-types.ts
index dcf4cc10b..c16803d15 100644
--- a/scripts/indexer/indexer-types.ts
+++ b/scripts/indexer/indexer-types.ts
@@ -3,13 +3,17 @@
  * @module scripts/indexer/indexer-types
  *
  * SMI-4852: Node-flavored sibling of `supabase/functions/indexer/indexer-types.ts`.
- * Pure interface declarations — byte-identical to the Deno parent (no env, no
- * imports, no fetches). Parity guarded by `scripts/indexer/tests/parity.test.ts`.
+ * Interface declarations only (no env, no fetches). NOTE (SMI-5286 1c): this file
+ * is NOT in the `parity.test.ts` guarded set, so the Node copy may diverge from
+ * the Deno parent — the `backfill_crawl` field + its type import below are
+ * Node-only (the backfill engine is the Node GHA runner, never the Deno cron).
  *
  * Original SMI-4376: Shared interfaces extracted from `index.ts` to keep the
  * orchestrator thin.
  */
 
+import type { BackfillCrawlOutcome } from './backfill-checkpoint.ts'
+
 /**
  * Indexer request body
  */
@@ -66,6 +70,12 @@ export interface IndexerResult {
     search_mode?: 'broad' | 'prefix-fallback'
     error?: string
   }
+  /**
+   * SMI-5286 1c: the facet driver's advanced cursor + counters for this backfill
+   * dispatch. Present only when `BACKFILL_MODE` is set and Phase 3b ran the
+   * size-facet crawl; `run.ts` reads it to write the checkpoint + step summary.
+   */
+  backfill_crawl?: BackfillCrawlOutcome
   /** Phase 1: High-trust wildcard expansion stats (SMI-2672). Always present; zero values when no wildcards ran. */
   high_trust_wildcard: {
     authors_with_wildcards: number
diff --git a/scripts/indexer/parse-env.ts b/scripts/indexer/parse-env.ts
index 8478345ec..a6b0b9e64 100644
--- a/scripts/indexer/parse-env.ts
+++ b/scripts/indexer/parse-env.ts
@@ -35,9 +35,21 @@ export interface IndexerEnv {
    * SMI-5286 Wave 1b (§#2): out-of-band backfill mode. When true the discovery
    * run drops the 7-day freshness window (un-windowed scan) and skips the
    * Phase-6 stale sweep so a partial crawl can't quarantine real skills. Bare
-   * name (no prefix) per the parse-env convention. Cap-raising (§#3) is Wave 1c.
+   * name (no prefix) per the parse-env convention.
    */
   BACKFILL_MODE: boolean
+  /**
+   * SMI-5286 1c (C-5): when set, the backfill facet crawl is restricted to this
+   * single `path:` prefix (the one-ecosystem DRY_RUN / targeted-recovery mode).
+   * Empty/unset → the broad `filename:SKILL.md` query (subsumes root + subdirs).
+   */
+  BACKFILL_PATH_PREFIX: string | undefined
+  /**
+   * SMI-5286 1c: per-dispatch budget — the facet driver stops after this many
+   * size (sub)ranges so a dispatch fits inside the GHA cap, writes a checkpoint,
+   * and the operator re-dispatches with `resume_from=latest`.
+   */
+  BACKFILL_MAX_RANGES: number
 }
 
 function getRequired(name: string): string {
@@ -83,9 +95,14 @@ export function parseEnv(env: NodeJS.ProcessEnv = process.env): IndexerEnv {
             return parsed
           })()
 
-    const MAX_PAGES = getInt('MAX_PAGES', 5)
-    const MAX_REPOS = getInt('MAX_REPOS', 100)
-    const CODE_SEARCH_MAX_PAGES = getInt('CODE_SEARCH_MAX_PAGES', 1)
+    // SMI-5286 Wave 1b: backfill mode (bare name; default off). Parsed before the
+    // caps so 1c (§#3, C-5) can raise their DEFAULTS in backfill mode while the
+    // cron defaults stay untouched. (The backfill workflow also sets the caps
+    // explicitly; these defaults are the safety net if it doesn't.)
+    const BACKFILL_MODE = getBool('BACKFILL_MODE', false)
+    const MAX_PAGES = getInt('MAX_PAGES', BACKFILL_MODE ? 10 : 5)
+    const MAX_REPOS = getInt('MAX_REPOS', BACKFILL_MODE ? 500 : 100)
+    const CODE_SEARCH_MAX_PAGES = getInt('CODE_SEARCH_MAX_PAGES', BACKFILL_MODE ? 10 : 1)
     const DRY_RUN = getBool('DRY_RUN', false)
     const RUN_TYPE_RAW = process.env.RUN_TYPE ?? 'discovery'
     if (
@@ -120,8 +137,15 @@ export function parseEnv(env: NodeJS.ProcessEnv = process.env): IndexerEnv {
       DISCOVERY_PHASE = Number(discoveryPhaseRaw) as DiscoveryPhase
     }
 
-    // SMI-5286 Wave 1b: backfill mode (bare name; default off).
-    const BACKFILL_MODE = getBool('BACKFILL_MODE', false)
+    // SMI-5286 1c: single-prefix restriction for the backfill facet crawl. Empty
+    // string → undefined (the broad query). BACKFILL_MAX_RANGES = per-dispatch
+    // (sub)range budget so a run fits the GHA cap (default 150).
+    const backfillPathPrefixRaw = process.env.BACKFILL_PATH_PREFIX
+    const BACKFILL_PATH_PREFIX =
+      backfillPathPrefixRaw == null || backfillPathPrefixRaw === ''
+        ? undefined
+        : backfillPathPrefixRaw
+    const BACKFILL_MAX_RANGES = getInt('BACKFILL_MAX_RANGES', 150)
 
     // Concurrency: kill-switch (env=1) forces 1, else CONCURRENCY env or D-3 default of 2.
     const kill_switch_engaged = getBool('CONCURRENCY_KILL_SWITCH', false)
@@ -146,6 +170,8 @@ export function parseEnv(env: NodeJS.ProcessEnv = process.env): IndexerEnv {
       kill_switch_engaged,
       DISCOVERY_PHASE,
       BACKFILL_MODE,
+      BACKFILL_PATH_PREFIX,
+      BACKFILL_MAX_RANGES,
     }
   } finally {
     process.env = prev
diff --git a/scripts/indexer/run.ts b/scripts/indexer/run.ts
index 18087f240..dc992b3cb 100644
--- a/scripts/indexer/run.ts
+++ b/scripts/indexer/run.ts
@@ -43,9 +43,12 @@ import { prefetchExistingSkills } from './prefetch-existing-skills.ts'
 // (§#2): token-source derivation + backfill summary sub-object emitted on stdout.
 import {
   readLatestCheckpoint,
+  writeCheckpoint,
   resolveTokenSource,
   type BackfillSummary,
 } from './backfill-checkpoint.ts'
+// SMI-5286 1c: the facet-crawl plan handed to the orchestrator's Phase 3b.
+import type { BackfillFacetPlan } from './subdirectory-search.ts'
 // SMI-4870: lock-skip observability — write an audit row even when the lock is
 // already held so partial-cycle gaps are detectable in SQL.
 import { writeIndexerAuditLog } from './indexer-audit-log.ts'
@@ -128,17 +131,18 @@ async function runDiscoveryBranch(
     `[Prefetch] ${rowsScanned} skill rows scanned; tree-hash cache seeded with ${treeHashCache.size} entries`
   )
 
-  // SMI-5286 Wave 1b (§#5): on a backfill dispatch, read the latest checkpoint
-  // cursor so a re-dispatch resumes mid-facet. `RESUME_FROM` ('latest' or a
-  // specific run_id) maps to the workflow's `resume_from` input. The full
-  // facet-resume DRIVER lands in Wave 1c; here we read + log the resumed cursor
-  // so the operator loop is observable from this dispatch onward.
+  // SMI-5286 1c (§#5): on a backfill dispatch, read the latest checkpoint cursor
+  // and build the facet-crawl plan so a re-dispatch resumes mid-facet. The
+  // advanced cursor returns on `result.backfill_crawl` and is checkpointed below.
+  // `RESUME_FROM` ('latest' or a specific run_id) maps to the workflow's
+  // `resume_from` input; the NEW checkpoint is keyed on GITHUB_RUN_ID.
   let checkpointId: string | null = null
+  let backfillFacetPlan: BackfillFacetPlan | undefined
+  const backfillRunId = process.env.GITHUB_RUN_ID ?? requestId
   if (env.BACKFILL_MODE) {
     const resumeFrom = process.env.RESUME_FROM
     const checkpoint = await readLatestCheckpoint(supabase, resumeFrom)
     if (checkpoint) {
-      checkpointId = checkpoint.run_id
       const { path, facet, last_page } = checkpoint.cursor
       console.log(
         `[Backfill] SMI-5286: resuming from checkpoint (run_id=${checkpoint.run_id}) ` +
@@ -148,6 +152,13 @@ async function runDiscoveryBranch(
     } else {
       console.log('[Backfill] SMI-5286: no prior checkpoint — starting from the beginning')
     }
+    backfillFacetPlan = {
+      startCursor: checkpoint?.cursor ?? null,
+      pathPrefix: env.BACKFILL_PATH_PREFIX,
+      perPage: 100,
+      maxPagesPerRange: env.CODE_SEARCH_MAX_PAGES,
+      maxRangesPerDispatch: env.BACKFILL_MAX_RANGES,
+    }
   }
 
   const result = await runDiscovery({
@@ -174,8 +185,31 @@ async function runDiscoveryBranch(
     discoveryPhase: env.DISCOVERY_PHASE,
     // SMI-5286 Wave 1b (§#2): drop the freshness window + skip Phase 6 in backfill.
     backfillMode: env.BACKFILL_MODE,
+    // SMI-5286 1c: facet-crawl plan (undefined on the cron path).
+    backfillFacetPlan,
   })
 
+  // SMI-5286 1c (§#5): persist the advanced facet cursor so the next dispatch
+  // (resume_from=latest) continues mid-facet. Written even in DRY_RUN so the
+  // operator can verify the resume loop before flipping to a live write
+  // (audit_logs is append-only telemetry, not the skills registry).
+  if (env.BACKFILL_MODE && result.backfill_crawl) {
+    const bc = result.backfill_crawl
+    const wrote = await writeCheckpoint(supabase, {
+      run_id: backfillRunId,
+      cursor: bc.cursor,
+      facets_completed: bc.facets_completed,
+      facets_total: bc.facets_total,
+      cap_saturated: bc.cap_saturated,
+      truncated_repo_count: bc.truncated_repo_count,
+    })
+    if (wrote) checkpointId = backfillRunId
+    console.log(
+      `[Backfill] SMI-5286: checkpoint ${wrote ? 'written' : 'FAILED'} (run_id=${backfillRunId}) ` +
+        `cursor.facet=${bc.cursor.facet} facets ${bc.facets_completed}/${bc.facets_total} done=${bc.done}`
+    )
+  }
+
   return { result, topics, rotationSource, checkpointId }
 }
 
@@ -352,22 +386,28 @@ async function main(): Promise<void> {
   const treeHashCache = (result as { tree_hash_cache?: { hits?: number; misses?: number } } | null)
     ?.tree_hash_cache
 
-  // SMI-5286 Wave 1b (§#2): on a backfill dispatch, attach a `backfill` sub-object
-  // onto `data` so `indexer-backfill.yml` can read `data.backfill.token_source`
-  // (its guardian fails the run if it reads 'app', proving PAT-bucket isolation).
-  // Facet counters are 0 in 1b (facet partitioning lands in 1c); token_source is
-  // the load-bearing field. Spread keeps the existing IndexerResult fields under
-  // `data` intact. Only emitted when BACKFILL_MODE is true.
+  // SMI-5286 (§#2): on a backfill dispatch, attach a `backfill` sub-object onto
+  // `data` so `indexer-backfill.yml` can read `data.backfill.token_source` (its
+  // guardian fails the run if it reads 'app', proving PAT-bucket isolation).
+  // 1c: the facet counters are sourced from `result.backfill_crawl` (the advanced
+  // cursor outcome from Phase 3b); `facets_remaining == 0` is the terminal
+  // condition the operator loop watches. Spread keeps the existing IndexerResult
+  // fields under `data` intact. Only emitted when BACKFILL_MODE is true.
   let data: unknown = result
   if (env.BACKFILL_MODE && result && typeof result === 'object') {
+    const crawl = (result as { backfill_crawl?: IndexerResult['backfill_crawl'] }).backfill_crawl
     const backfill: BackfillSummary = {
       token_source: resolveTokenSource(),
       checkpoint_id: checkpointId,
-      facets_total: 0,
-      facets_completed: 0,
-      facets_remaining: 0,
-      cap_saturated: false,
-      truncated_repo_count: 0,
+      facets_total: crawl?.facets_total ?? 0,
+      facets_completed: crawl?.facets_completed ?? 0,
+      facets_remaining: crawl ? crawl.facets_total - crawl.facets_completed : 0,
+      cap_saturated: crawl?.cap_saturated ?? false,
+      truncated_repo_count: crawl?.truncated_repo_count ?? 0,
+      // M-2: honest crawl position — 'done' only when the bisection frontier is
+      // also empty (facets_remaining alone reads 0 while sub-ranges still drain).
+      current_facet: crawl?.cursor.facet,
+      pending_subrange_count: crawl?.cursor.pending_subranges?.length ?? 0,
     }
     data = { ...(result as Record<string, unknown>), backfill }
   }
diff --git a/scripts/indexer/subdirectory-search.helpers.ts b/scripts/indexer/subdirectory-search.helpers.ts
new file mode 100644
index 000000000..e205f640c
--- /dev/null
+++ b/scripts/indexer/subdirectory-search.helpers.ts
@@ -0,0 +1,308 @@
+/**
+ * Subdirectory-search helpers: shared per-skill result processor + the SMI-5286
+ * 1c size-faceted backfill crawl.
+ * @module scripts/indexer/subdirectory-search.helpers
+ *
+ * Extracted from `subdirectory-search.ts` to keep that entrypoint under the
+ * 500-line CI gate (SMI-5286 1c). `processSearchResults` is shared by the legacy
+ * broad/fallback loop AND the backfill crawl; `runBackfillFacetCrawl` is the
+ * size-faceted depth-first driver. The dependency is one-way
+ * (`subdirectory-search.ts` → this file) — this file never imports the entrypoint.
+ *
+ * NOT parity-guarded (`parity.test.ts` exempts the subdirectory surface, C-2),
+ * so divergence from the Deno copy is safe and intended (the backfill engine is
+ * the Node GHA runner only).
+ */
+
+import { delay, type RateLimitTelemetry } from './_shared/rate-limit.ts'
+import { searchCodeForSkillMdInSubdirectory } from './code-search.ts'
+import { checkSkillMdExists } from './skill-processor.ts'
+import { fetchRepoLicense, isPermissiveLicense } from './license-filter.ts'
+import { enumerateRepoSkillPaths, type EnumerateTelemetry } from './trees-enumerate.ts'
+import { buildSkillTreeUrl } from './skill-url.ts'
+import { buildSizeFacets, facetId, facetToQualifier } from './code-search.facets.ts'
+import {
+  type BackfillCursor,
+  type BackfillCrawlOutcome,
+  advanceFacet,
+  bisectCurrentFacet,
+  cursorToFacetState,
+  currentFacetRange,
+  facetStateToCursor,
+  isFacetCrawlDone,
+} from './backfill-checkpoint.ts'
+import type { GitHubRepository } from './topic-search.ts'
+import type { SkillMdValidation } from './skill-processor.ts'
+
+/**
+ * Process code search results: deduplicate, license-gate, validate, and collect repos.
+ * Shared by both broad and fallback search paths.
+ *
+ * SMI-4852: Threads `telemetry` to downstream `fetchRepoLicense` and
+ * `checkSkillMdExists` calls so every GitHub API hit lands in the shared
+ * collector.
+ *
+ * SMI-5286 Wave 1a (§#1, C-1): per-skill (collection) extraction. Each candidate
+ * repo is enumerated ONCE via the Trees API (`enumerateRepoSkillPaths`) and EVERY
+ * valid SKILL.md parent dir becomes its own `GitHubRepository`, with a DISTINCT
+ * per-skill tree URL (`buildSkillTreeUrl`) so N skills in one repo yield N distinct
+ * `repo_url` rows that never collide on `onConflict: 'repo_url'`. Each per-path row
+ * is validated independently (§#4 strict gate) before it is collected; validated
+ * rows are `installable:true` (`skill-processor.ts:440` then persists the non-null
+ * tree URL). Edit E: only the enumeration loop changed — the dedup-key /
+ * freshness-qualifier lines (`:89`) are byte-stable for the SMI-5176 rebase.
+ */
+export async function processSearchResults(
+  resultRepos: GitHubRepository[],
+  seenUrls: Set<string>,
+  validationCache: Map<string, SkillMdValidation>,
+  validationOptions: { strictValidation?: boolean; minContentLength?: number },
+  repos: GitHubRepository[],
+  stats: { licenseFiltered: number; licenseFetchFailed: number },
+  telemetry: RateLimitTelemetry,
+  enumerateTelemetry: EnumerateTelemetry,
+  enumeratedRepos: Set<string>
+): Promise<void> {
+  for (const repo of resultRepos) {
+    // Deduplication key includes skillPath: one repo can have multiple skills
+    const dedupKey = repo.skillPath ? `${repo.url}/${repo.skillPath}` : repo.url
+    if (seenUrls.has(dedupKey)) continue
+
+    // License gate: fetch SPDX from GitHub API (not included in code search response)
+    const { license: spdxId, fetchFailed } = await fetchRepoLicense(
+      repo.owner,
+      repo.repoName,
+      telemetry
+    )
+
+    if (fetchFailed) {
+      // API failure — skip this run but don't count as license-filtered.
+      // NOT added to seenUrls so the repo is retried on the next indexer run.
+      console.log(`[BroadDiscovery] License fetch failed (will retry next run): ${repo.fullName}`)
+      stats.licenseFetchFailed++
+      await delay(200)
+      continue
+    }
+
+    if (!isPermissiveLicense(spdxId)) {
+      // Confirmed non-permissive license — permanently excluded.
+      console.log(`[BroadDiscovery] License excluded: ${repo.fullName} spdx=${spdxId ?? 'null'}`)
+      stats.licenseFiltered++
+      await delay(200)
+      continue
+    }
+
+    // Mark this code-search result consumed (per-repo+skillPath identity) so the
+    // same surfaced file is not re-processed across pages/prefixes.
+    seenUrls.add(dedupKey)
+
+    // SMI-5286 Wave 1a (§#1): enumerate the repo's full tree ONCE. The broad query
+    // can surface the same repo via multiple SKILL.md hits; guard re-enumeration.
+    const repoKey = `${repo.owner}/${repo.repoName}`
+    if (enumeratedRepos.has(repoKey)) {
+      await delay(50)
+      continue
+    }
+    enumeratedRepos.add(repoKey)
+
+    const { entries, truncatedByApi, truncatedByCap } = await enumerateRepoSkillPaths(
+      repo.owner,
+      repo.repoName,
+      repo.defaultBranch,
+      telemetry,
+      enumerateTelemetry
+    )
+
+    if (truncatedByApi) {
+      // Trees API truncated — do NOT emit a partial set (deterministic skip).
+      console.log(
+        `[BroadDiscovery] Trees truncated, skipping for manual handling: ${repo.fullName}`
+      )
+      await delay(50)
+      continue
+    }
+    if (truncatedByCap) {
+      console.log(`[BroadDiscovery] Per-repo cap reached, taking first N: ${repo.fullName}`)
+    }
+
+    // Validate each enumerated SKILL.md independently (§#4 strict gate) and emit
+    // one per-skill GitHubRepository with a distinct tree URL (C-1) per valid path.
+    for (const entry of entries) {
+      const skillPath = entry.path
+      const installable = await checkSkillMdExists(
+        repo.owner,
+        repo.repoName,
+        repo.defaultBranch,
+        validationCache,
+        telemetry,
+        skillPath,
+        validationOptions
+      )
+
+      // C-1: build the per-skill tree URL from the BARE repo html_url
+      // (reconstructed from owner/repoName), NOT from `repo.url` — by this point
+      // `repo.url` is already the code-search mapper's tree URL, so reusing it
+      // would double the `/tree/<branch>` segment. `skillUrl` already encodes
+      // `skillPath`, so it alone is the dedup key.
+      const skillUrl = buildSkillTreeUrl(
+        `https://github.com/${repo.owner}/${repo.repoName}`,
+        repo.defaultBranch,
+        skillPath
+      )
+      if (seenUrls.has(skillUrl)) continue
+      seenUrls.add(skillUrl)
+
+      repos.push({
+        ...repo,
+        url: skillUrl,
+        installable,
+        skillPath,
+        treeHash: entry.blobSha,
+        license: spdxId,
+      })
+      await delay(50)
+    }
+  }
+}
+
+/**
+ * SMI-5286 1c: a single dispatch's facet-crawl plan. The driver in `run.ts`
+ * builds this from the resumed checkpoint cursor + raised caps and hands it to
+ * `runSubdirectorySearch`; the returned {@link BackfillCrawlOutcome} carries the
+ * advanced cursor back for the next checkpoint write.
+ */
+export interface BackfillFacetPlan {
+  /** Cursor to resume from (null = cold start at facet 0, page 0). */
+  startCursor: BackfillCursor | null
+  /**
+   * Restrict the crawl to this single `path:` prefix (the `BACKFILL_PATH_PREFIX`
+   * one-ecosystem DRY_RUN / targeted-recovery mode). `undefined` = the broad
+   * `filename:SKILL.md` query (no `path:` constraint), which subsumes root +
+   * every subdirectory.
+   */
+  pathPrefix: string | undefined
+  /** Results per code-search page (GitHub max 100). */
+  perPage: number
+  /** Pages to crawl per (sub)range before treating it as exhausted (≈ ceil(1000 / perPage)). */
+  maxPagesPerRange: number
+  /** Dispatch budget: stop after this many (sub)ranges so the run fits the GHA cap. */
+  maxRangesPerDispatch: number
+}
+
+/** GitHub code-search retrievable-results ceiling per query (any query caps here). */
+const CODE_SEARCH_RESULT_CAP = 1000
+
+/**
+ * SMI-5286 1c: depth-first size-faceted crawl of the broad `filename:SKILL.md`
+ * query (or a single `path:` prefix). Pages each size (sub)range to the 1000-cap;
+ * a range whose `total_count` exceeds the cap is BISECTED (its halves crawled
+ * before the next top-level facet) so every file is reachable. A range that
+ * saturates but cannot subdivide further (≥1000 identical-byte-size files —
+ * almost always denylist-caught boilerplate) is recorded as truncated, logged,
+ * and skipped (never silently dropped). The frontier (facet index + bisection
+ * stack + page) is fully captured by the returned cursor so a dispatch boundary
+ * mid-bisection resumes losslessly. Reuses {@link processSearchResults} (license
+ * gate + Trees per-skill enumeration + per-path validation) unchanged.
+ */
+export async function runBackfillFacetCrawl(
+  plan: BackfillFacetPlan,
+  seenUrls: Set<string>,
+  validationCache: Map<string, SkillMdValidation>,
+  validationOptions: { strictValidation?: boolean; minContentLength?: number },
+  repos: GitHubRepository[],
+  stats: { licenseFiltered: number; licenseFetchFailed: number },
+  telemetry: RateLimitTelemetry,
+  enumerateTelemetry: EnumerateTelemetry,
+  enumeratedRepos: Set<string>,
+  errors: string[]
+): Promise<BackfillCrawlOutcome> {
+  const facets = buildSizeFacets()
+  const state = cursorToFacetState(plan.startCursor)
+  const pathLabel = plan.pathPrefix ?? 'broad'
+  let capSaturated = false
+  let truncatedRanges = 0
+  let rangesCrawled = 0
+
+  while (rangesCrawled < plan.maxRangesPerDispatch) {
+    const range = currentFacetRange(state, facets)
+    if (!range) break // ladder exhausted
+    const qualifier = facetToQualifier(range)
+
+    let saturated = false
+    let errored = false
+    for (let page = state.lastPage + 1; page <= plan.maxPagesPerRange; page++) {
+      const result = await searchCodeForSkillMdInSubdirectory(
+        plan.pathPrefix,
+        page,
+        plan.perPage,
+        telemetry,
+        qualifier
+      )
+      if (result.error) {
+        errors.push(`[backfill ${pathLabel} ${facetId(range)} p${page}] ${result.error}`)
+        errored = true
+        break
+      }
+      // The 1000-cap is detected from total_count on the first page: rather than
+      // waste pages on the unreachable tail, bisect immediately — the sub-ranges
+      // (each < cap, or bisected further) cover the same files.
+      if (page === 1 && result.total > CODE_SEARCH_RESULT_CAP) {
+        saturated = true
+        break
+      }
+      await processSearchResults(
+        result.repos,
+        seenUrls,
+        validationCache,
+        validationOptions,
+        repos,
+        stats,
+        telemetry,
+        enumerateTelemetry,
+        enumeratedRepos
+      )
+      state.lastPage = page
+      if (result.repos.length < plan.perPage) break // short page → range exhausted
+      await delay(6000) // 10 code-search req/min → 6s between pages
+    }
+
+    rangesCrawled++
+
+    if (errored) {
+      // M-1: a page error (rate-limiter already retried transient 403/429, so a
+      // returned error is exceptional) — count it as truncated so it surfaces in
+      // the dispatch summary + errors[], then advance past the range rather than
+      // re-crawl it forever this dispatch. The operator can re-run the facet under
+      // a narrower BACKFILL_PATH_PREFIX once the cause is cleared (SPARC §#3).
+      truncatedRanges++
+      console.warn(
+        `[Backfill] facet ${facetId(range)} (${pathLabel}) errored — recorded as truncated, advancing`
+      )
+      advanceFacet(state)
+    } else if (saturated) {
+      capSaturated = true
+      if (!bisectCurrentFacet(state, range)) {
+        // Saturated AND unbisectable: record + skip (never silent). The operator
+        // can re-run this facet under a narrower BACKFILL_PATH_PREFIX (SPARC §#3).
+        truncatedRanges++
+        console.warn(
+          `[Backfill] facet ${facetId(range)} (${pathLabel}) saturated at the 1000-cap and cannot subdivide — recorded as truncated, skipping`
+        )
+        advanceFacet(state)
+      }
+    } else {
+      // Range exhausted (short page, or page cap reached with total <= cap).
+      advanceFacet(state)
+    }
+  }
+
+  return {
+    cursor: facetStateToCursor(state, plan.pathPrefix ?? '', facets),
+    done: isFacetCrawlDone(state, facets),
+    cap_saturated: capSaturated,
+    truncated_repo_count: truncatedRanges,
+    facets_completed: state.facetIndex,
+    facets_total: facets.length,
+    ranges_crawled: rangesCrawled,
+  }
+}
diff --git a/scripts/indexer/subdirectory-search.ts b/scripts/indexer/subdirectory-search.ts
index 5102a762a..fa025714b 100644
--- a/scripts/indexer/subdirectory-search.ts
+++ b/scripts/indexer/subdirectory-search.ts
@@ -5,23 +5,28 @@
  * SMI-4852: Node-flavored sibling of
  * `supabase/functions/indexer/subdirectory-search.ts`. This module performs no
  * direct GitHub fetches; it dispatches to `searchCodeForSkillMdInSubdirectory`
- * (already wrapped per Hard Rule 1) and the sibling-module helpers
- * `checkSkillMdExists` / `fetchRepoLicense` (each wrapped in their own
- * cluster). Telemetry is threaded through to every downstream call so the
- * single run-scoped collector aggregates header data from every consumer.
- * Parity is guarded by `scripts/indexer/tests/parity.test.ts`.
+ * (already wrapped per Hard Rule 1) and the sibling-module helper
+ * `processSearchResults` (license-gate + Trees per-skill enumeration). Telemetry
+ * is threaded through to every downstream call so the single run-scoped collector
+ * aggregates header data from every consumer. NOTE (SMI-5286 1c): this surface is
+ * NOT parity-guarded (`parity.test.ts` exempts subdirectory-search), so the Node
+ * copy may diverge from the Deno parent.
  *
  * Original module docs:
  *
  * SMI-2660: Phase 3b of the indexer — finds SKILL.md files via GitHub Code Search.
  * SMI-3229: Replaced hardcoded path-prefix loop with broad `filename:SKILL.md` query.
  *
- * Extracted from index.ts to satisfy the 500-line CI gate.
+ * Extracted from index.ts to satisfy the 500-line CI gate. SMI-5286 1c moved the
+ * shared `processSearchResults` + the size-faceted backfill crawl to
+ * `subdirectory-search.helpers.ts` to stay under that gate.
  *
  * Strategy:
  * 1. Primary: broad query (no path: constraint) — discovers SKILL.md at any depth
  * 2. Fallback: if any page returns incomplete_results, re-runs with 7 path-scoped
  *    queries to ensure known ecosystems are fully covered
+ * 3. SMI-5286 1c backfill: when a `BackfillFacetPlan` is supplied, the legacy loop
+ *    is replaced by a resumable size-faceted depth-first crawl.
  *
  * Rate limit: 10 code search requests/minute (separate from main API).
  * Gated by SKILLSMITH_ENABLE_SUBDIRECTORY_SEARCH=true env var to prevent
@@ -35,12 +40,25 @@
 
 import { GITHUB_API_DELAY, delay, type RateLimitTelemetry } from './_shared/rate-limit.ts'
 import { searchCodeForSkillMdInSubdirectory } from './code-search.ts'
-import { checkSkillMdExists } from './skill-processor.ts'
-import { fetchRepoLicense, isPermissiveLicense } from './license-filter.ts'
-import { enumerateRepoSkillPaths, type EnumerateTelemetry } from './trees-enumerate.ts'
-import { buildSkillTreeUrl } from './skill-url.ts'
+import { type EnumerateTelemetry } from './trees-enumerate.ts'
+import {
+  processSearchResults,
+  runBackfillFacetCrawl,
+  type BackfillFacetPlan,
+} from './subdirectory-search.helpers.ts'
+import type { BackfillCrawlOutcome } from './backfill-checkpoint.ts'
 import type { GitHubRepository } from './topic-search.ts'
 import type { SkillMdValidation } from './skill-processor.ts'
+import type { IndexerResult } from './indexer-types.ts'
+
+export type { BackfillFacetPlan } from './subdirectory-search.helpers.ts'
+
+/**
+ * Results per code-search page. SMI-5286 1c (C-5): GitHub allows 100 (was a
+ * hardcoded 30 → 3.3x fewer requests for the same coverage). The cron leaves
+ * Phase 3b disabled, so this only affects manual-enable + backfill runs.
+ */
+const BROAD_QUERY_PER_PAGE = 100
 
 /**
  * Fallback path prefixes used when broad query returns incomplete results.
@@ -69,137 +87,6 @@ export const FALLBACK_PATH_PREFIXES = [
   '.windsurf/skills', // Windsurf (native, since 2026-03)
 ]
 
-/**
- * Process code search results: deduplicate, license-gate, validate, and collect repos.
- * Shared by both broad and fallback search paths.
- *
- * SMI-4852: Threads `telemetry` to downstream `fetchRepoLicense` and
- * `checkSkillMdExists` calls so every GitHub API hit lands in the shared
- * collector.
- *
- * SMI-5286 Wave 1a (§#1, C-1): per-skill (collection) extraction. Each candidate
- * repo is enumerated ONCE via the Trees API (`enumerateRepoSkillPaths`) and EVERY
- * valid SKILL.md parent dir becomes its own `GitHubRepository`, with a DISTINCT
- * per-skill tree URL (`buildSkillTreeUrl`) so N skills in one repo yield N distinct
- * `repo_url` rows that never collide on `onConflict: 'repo_url'`. Each per-path row
- * is validated independently (§#4 strict gate) before it is collected; validated
- * rows are `installable:true` (`skill-processor.ts:440` then persists the non-null
- * tree URL). Edit E: only the enumeration loop changed — the dedup-key /
- * freshness-qualifier lines (`:89`) are byte-stable for the SMI-5176 rebase.
- */
-async function processSearchResults(
-  resultRepos: GitHubRepository[],
-  seenUrls: Set<string>,
-  validationCache: Map<string, SkillMdValidation>,
-  validationOptions: { strictValidation?: boolean; minContentLength?: number },
-  repos: GitHubRepository[],
-  stats: { licenseFiltered: number; licenseFetchFailed: number },
-  telemetry: RateLimitTelemetry,
-  enumerateTelemetry: EnumerateTelemetry,
-  enumeratedRepos: Set<string>
-): Promise<void> {
-  for (const repo of resultRepos) {
-    // Deduplication key includes skillPath: one repo can have multiple skills
-    const dedupKey = repo.skillPath ? `${repo.url}/${repo.skillPath}` : repo.url
-    if (seenUrls.has(dedupKey)) continue
-
-    // License gate: fetch SPDX from GitHub API (not included in code search response)
-    const { license: spdxId, fetchFailed } = await fetchRepoLicense(
-      repo.owner,
-      repo.repoName,
-      telemetry
-    )
-
-    if (fetchFailed) {
-      // API failure — skip this run but don't count as license-filtered.
-      // NOT added to seenUrls so the repo is retried on the next indexer run.
-      console.log(`[BroadDiscovery] License fetch failed (will retry next run): ${repo.fullName}`)
-      stats.licenseFetchFailed++
-      await delay(200)
-      continue
-    }
-
-    if (!isPermissiveLicense(spdxId)) {
-      // Confirmed non-permissive license — permanently excluded.
-      console.log(`[BroadDiscovery] License excluded: ${repo.fullName} spdx=${spdxId ?? 'null'}`)
-      stats.licenseFiltered++
-      await delay(200)
-      continue
-    }
-
-    // Mark this code-search result consumed (per-repo+skillPath identity) so the
-    // same surfaced file is not re-processed across pages/prefixes.
-    seenUrls.add(dedupKey)
-
-    // SMI-5286 Wave 1a (§#1): enumerate the repo's full tree ONCE. The broad query
-    // can surface the same repo via multiple SKILL.md hits; guard re-enumeration.
-    const repoKey = `${repo.owner}/${repo.repoName}`
-    if (enumeratedRepos.has(repoKey)) {
-      await delay(50)
-      continue
-    }
-    enumeratedRepos.add(repoKey)
-
-    const { entries, truncatedByApi, truncatedByCap } = await enumerateRepoSkillPaths(
-      repo.owner,
-      repo.repoName,
-      repo.defaultBranch,
-      telemetry,
-      enumerateTelemetry
-    )
-
-    if (truncatedByApi) {
-      // Trees API truncated — do NOT emit a partial set (deterministic skip).
-      console.log(
-        `[BroadDiscovery] Trees truncated, skipping for manual handling: ${repo.fullName}`
-      )
-      await delay(50)
-      continue
-    }
-    if (truncatedByCap) {
-      console.log(`[BroadDiscovery] Per-repo cap reached, taking first N: ${repo.fullName}`)
-    }
-
-    // Validate each enumerated SKILL.md independently (§#4 strict gate) and emit
-    // one per-skill GitHubRepository with a distinct tree URL (C-1) per valid path.
-    for (const entry of entries) {
-      const skillPath = entry.path
-      const installable = await checkSkillMdExists(
-        repo.owner,
-        repo.repoName,
-        repo.defaultBranch,
-        validationCache,
-        telemetry,
-        skillPath,
-        validationOptions
-      )
-
-      // C-1: build the per-skill tree URL from the BARE repo html_url
-      // (reconstructed from owner/repoName), NOT from `repo.url` — by this point
-      // `repo.url` is already the code-search mapper's tree URL, so reusing it
-      // would double the `/tree/<branch>` segment. `skillUrl` already encodes
-      // `skillPath`, so it alone is the dedup key.
-      const skillUrl = buildSkillTreeUrl(
-        `https://github.com/${repo.owner}/${repo.repoName}`,
-        repo.defaultBranch,
-        skillPath
-      )
-      if (seenUrls.has(skillUrl)) continue
-      seenUrls.add(skillUrl)
-
-      repos.push({
-        ...repo,
-        url: skillUrl,
-        installable,
-        skillPath,
-        treeHash: entry.blobSha,
-        license: spdxId,
-      })
-      await delay(50)
-    }
-  }
-}
-
 /**
  * SMI-3229: Run Phase 3b broad SKILL.md discovery with incomplete_results fallback.
  *
@@ -222,13 +109,17 @@ async function processSearchResults(
  * @param validationOptions - Strict validation and minimum content length options
  * @param maxPages - Maximum pages per query (capped by caller)
  * @param telemetry - Shared rate-limit telemetry collector.
+ * @param backfillPlan - SMI-5286 1c: when present, run the size-faceted backfill
+ *   crawl instead of the legacy broad+fallback loop. Optional → every existing
+ *   5-arg caller (the cron Phase-3b path + tests) is byte-stable.
  */
 export async function runSubdirectorySearch(
   seenUrls: Set<string>,
   validationCache: Map<string, SkillMdValidation>,
   validationOptions: { strictValidation?: boolean; minContentLength?: number },
   maxPages: number,
-  telemetry: RateLimitTelemetry
+  telemetry: RateLimitTelemetry,
+  backfillPlan?: BackfillFacetPlan
 ): Promise<{
   repos: GitHubRepository[]
   totalFound: number
@@ -238,6 +129,8 @@ export async function runSubdirectorySearch(
   incompleteResults: number
   searchMode: 'broad' | 'prefix-fallback'
   errors: string[]
+  /** SMI-5286 1c: present only when `backfillPlan` was supplied. */
+  backfill?: BackfillCrawlOutcome
 }> {
   const repos: GitHubRepository[] = []
   const errors: string[] = []
@@ -245,21 +138,56 @@ export async function runSubdirectorySearch(
   let totalRetries = 0
   const stats = { licenseFiltered: 0, licenseFetchFailed: 0 }
   let incompleteResults = 0
-  let searchMode: 'broad' | 'prefix-fallback' = 'broad'
+  const searchMode: 'broad' | 'prefix-fallback' = 'broad'
   // SMI-5286 Wave 1a: run-scoped per-skill extraction state. `enumerateTelemetry`
   // accumulates denylist/cap/truncation counters across the whole run;
   // `enumeratedRepos` guards one Trees call per repo across pages and prefixes.
   const enumerateTelemetry: EnumerateTelemetry = {}
   const enumeratedRepos = new Set<string>()
 
+  // ── SMI-5286 1c: size-faceted backfill crawl (replaces the legacy loop) ──
+  if (backfillPlan) {
+    const backfill = await runBackfillFacetCrawl(
+      backfillPlan,
+      seenUrls,
+      validationCache,
+      validationOptions,
+      repos,
+      stats,
+      telemetry,
+      enumerateTelemetry,
+      enumeratedRepos,
+      errors
+    )
+    console.log(
+      `[Backfill] Facet crawl: ${repos.length} skills added, ${backfill.facets_completed}/${backfill.facets_total} facets, ${backfill.ranges_crawled} ranges this dispatch, ` +
+        `${stats.licenseFiltered} license-filtered, cap_saturated=${backfill.cap_saturated}, truncated=${backfill.truncated_repo_count}, done=${backfill.done}`
+    )
+    console.log(
+      `[Backfill] Per-skill extraction: ${enumeratedRepos.size} repos enumerated, ${enumerateTelemetry.denylistSkipped ?? 0} denylist-skipped, ${enumerateTelemetry.cappedRepoCount ?? 0} capped, ${enumerateTelemetry.truncatedRepoCount ?? 0} api-truncated`
+    )
+    return {
+      repos,
+      totalFound: repos.length,
+      retries: totalRetries,
+      licenseFiltered: stats.licenseFiltered,
+      licenseFetchFailed: stats.licenseFetchFailed,
+      incompleteResults,
+      searchMode,
+      errors,
+      backfill,
+    }
+  }
+
   // ── Primary: broad query (no path constraint) ────────────────────────
   console.log('[BroadDiscovery] Running broad filename:SKILL.md query...')
 
+  let primaryMode: 'broad' | 'prefix-fallback' = 'broad'
   for (let page = 1; page <= maxPages; page++) {
     const result = await searchCodeForSkillMdInSubdirectory(
       undefined, // no pathPrefix → broad query
       page,
-      30,
+      BROAD_QUERY_PER_PAGE,
       telemetry
     )
 
@@ -291,14 +219,14 @@ export async function runSubdirectorySearch(
       enumeratedRepos
     )
 
-    if (result.repos.length < 30) break
+    if (result.repos.length < BROAD_QUERY_PER_PAGE) break
     // Code search rate limit: 10 req/min → 6s between pages
     await delay(6000)
   }
 
   // ── Fallback: path-scoped queries if broad had incomplete results ────
   if (incompleteResults > 0) {
-    searchMode = 'prefix-fallback'
+    primaryMode = 'prefix-fallback'
     console.log(
       `[BroadDiscovery] ${incompleteResults} page(s) had incomplete results — falling back to path-scoped queries`
     )
@@ -307,7 +235,12 @@ export async function runSubdirectorySearch(
       console.log(`[BroadDiscovery] Fallback searching path:${pathPrefix}...`)
 
       for (let page = 1; page <= maxPages; page++) {
-        const result = await searchCodeForSkillMdInSubdirectory(pathPrefix, page, 30, telemetry)
+        const result = await searchCodeForSkillMdInSubdirectory(
+          pathPrefix,
+          page,
+          BROAD_QUERY_PER_PAGE,
+          telemetry
+        )
 
         totalRetries += result.retries
 
@@ -339,7 +272,7 @@ export async function runSubdirectorySearch(
           enumeratedRepos
         )
 
-        if (result.repos.length < 30) break
+        if (result.repos.length < BROAD_QUERY_PER_PAGE) break
         // Code search rate limit: 10 req/min → 6s between pages
         await delay(6000)
       }
@@ -350,7 +283,7 @@ export async function runSubdirectorySearch(
   }
 
   console.log(
-    `[BroadDiscovery] Complete (${searchMode}): ${repos.length} added, ${stats.licenseFiltered} license-filtered, ${stats.licenseFetchFailed} fetch-failed, ${incompleteResults} incomplete, ${totalRetries} retries`
+    `[BroadDiscovery] Complete (${primaryMode}): ${repos.length} added, ${stats.licenseFiltered} license-filtered, ${stats.licenseFetchFailed} fetch-failed, ${incompleteResults} incomplete, ${totalRetries} retries`
   )
   // SMI-5286 Wave 1a: per-skill extraction observability (§#1, Edit D).
   console.log(
@@ -372,7 +305,63 @@ export async function runSubdirectorySearch(
     licenseFiltered: stats.licenseFiltered,
     licenseFetchFailed: stats.licenseFetchFailed,
     incompleteResults,
-    searchMode,
+    searchMode: primaryMode,
     errors,
   }
 }
+
+/**
+ * Phase 3b wrapper: runs {@link runSubdirectorySearch}, folds its repos/errors/
+ * stats into the orchestrator's accumulators, and (SMI-5286 1c) surfaces the
+ * backfill cursor on `result.backfill_crawl`. Extracted here so
+ * `discovery-orchestrator.ts` stays under the 500-line gate. Never throws — a
+ * Phase-3b failure records a zeroed `subdirectory_search` and is swallowed
+ * (one phase must not abort the cycle), matching the prior inline behavior.
+ */
+export async function runSubdirectorySearchPhase(args: {
+  seenUrls: Set<string>
+  validationCache: Map<string, SkillMdValidation>
+  validationOptions: { strictValidation?: boolean; minContentLength?: number }
+  codeSearchMaxPages: number
+  telemetry: RateLimitTelemetry
+  repositories: GitHubRepository[]
+  result: IndexerResult
+  backfillFacetPlan?: BackfillFacetPlan
+}): Promise<void> {
+  try {
+    const subdirResult = await runSubdirectorySearch(
+      args.seenUrls,
+      args.validationCache,
+      args.validationOptions,
+      args.codeSearchMaxPages,
+      args.telemetry,
+      args.backfillFacetPlan
+    )
+    for (const repo of subdirResult.repos) {
+      args.repositories.push(repo)
+    }
+    args.result.errors.push(...subdirResult.errors)
+    args.result.subdirectory_search = {
+      repos_found: subdirResult.repos.length,
+      total_found: subdirResult.totalFound,
+      retries: subdirResult.retries,
+      license_filtered: subdirResult.licenseFiltered,
+      license_fetch_failed: subdirResult.licenseFetchFailed,
+      incomplete_results: subdirResult.incompleteResults,
+      search_mode: subdirResult.searchMode,
+    }
+    if (subdirResult.backfill) {
+      args.result.backfill_crawl = subdirResult.backfill
+    }
+  } catch (err) {
+    console.warn(`[CodeSearch] Phase 3b failed: ${err instanceof Error ? err.message : 'Unknown'}`)
+    args.result.subdirectory_search = {
+      repos_found: 0,
+      total_found: 0,
+      retries: 0,
+      license_filtered: 0,
+      license_fetch_failed: 0,
+      error: 'phase_failed',
+    }
+  }
+}
diff --git a/scripts/indexer/trees-search.ts b/scripts/indexer/trees-search.ts
index 4fd1eb57c..d372391ce 100644
--- a/scripts/indexer/trees-search.ts
+++ b/scripts/indexer/trees-search.ts
@@ -110,8 +110,12 @@ export async function fetchSkillPathsFromTree(
           // Match SKILL.md case-insensitively at any depth
           if (!entry.path.endsWith('/SKILL.md') && entry.path.toUpperCase() !== 'SKILL.MD') continue
           const slashIdx = entry.path.lastIndexOf('/')
-          if (slashIdx < 0) continue // root SKILL.md — no parent dir to extract
-          skillEntries.push({ path: entry.path.slice(0, slashIdx), blobSha: entry.sha })
+          // SMI-5286 1c (C-4): a root-level SKILL.md has no parent dir → emit path:''
+          // (buildSkillTreeUrl maps '' → …/tree/<branch>). Previously dropped, which
+          // silently lost repos whose only skill is a root SKILL.md once Phase 3a (the
+          // only other root-skill emitter) was disabled.
+          const skillPath = slashIdx < 0 ? '' : entry.path.slice(0, slashIdx)
+          skillEntries.push({ path: skillPath, blobSha: entry.sha })
         }
 
         if (data.truncated) {
diff --git a/scripts/tests/indexer/backfill-checkpoint.statemachine.test.ts b/scripts/tests/indexer/backfill-checkpoint.statemachine.test.ts
new file mode 100644
index 000000000..a28846f4d
--- /dev/null
+++ b/scripts/tests/indexer/backfill-checkpoint.statemachine.test.ts
@@ -0,0 +1,200 @@
+/**
+ * Facet driver state-machine tests (SMI-5286 1c)
+ * @module scripts/tests/indexer/backfill-checkpoint.statemachine
+ *
+ * The cursor <-> crawl-frontier state machine that drives the size-faceted
+ * backfill: a depth-first walk of the static `buildSizeFacets()` ladder where a
+ * saturated facet is bisected (its halves drained before the next facet) and the
+ * frontier (facetIndex + bisection stack + page) round-trips losslessly through
+ * the JSON checkpoint. Split out of backfill-checkpoint.test.ts to keep each file
+ * focused + under the 500-line convention.
+ */
+
+import { describe, it, expect } from 'vitest'
+import {
+  cursorToFacetState,
+  currentFacetRange,
+  bisectCurrentFacet,
+  advanceFacet,
+  isFacetCrawlDone,
+  facetStateToCursor,
+  type FacetCrawlState,
+} from '../../indexer/backfill-checkpoint.ts'
+import { buildSizeFacets } from '../../indexer/code-search.facets.ts'
+
+const FACETS = buildSizeFacets()
+
+describe('facet driver state machine (SMI-5286 1c)', () => {
+  it('cursorToFacetState cold-starts on null/undefined', () => {
+    expect(cursorToFacetState(null)).toEqual({ facetIndex: 0, pendingSubranges: [], lastPage: 0 })
+    expect(cursorToFacetState(undefined)).toEqual({
+      facetIndex: 0,
+      pendingSubranges: [],
+      lastPage: 0,
+    })
+  })
+
+  it('cursorToFacetState reconstructs facet_index, last_page, and pending_subranges', () => {
+    const state = cursorToFacetState({
+      path: '',
+      facet: '0-63',
+      last_page: 2,
+      facet_index: 3,
+      pending_subranges: [
+        [0, 63],
+        [64, 127],
+      ],
+    })
+    expect(state.facetIndex).toBe(3)
+    expect(state.lastPage).toBe(2)
+    expect(state.pendingSubranges).toEqual([
+      { lo: 0, hi: 63 },
+      { lo: 64, hi: 127 },
+    ])
+  })
+
+  it('cursorToFacetState maps a null upper bound back to Infinity', () => {
+    const state = cursorToFacetState({
+      path: '',
+      facet: '16384+',
+      last_page: 0,
+      facet_index: 8,
+      pending_subranges: [[16384, null]],
+    })
+    expect(state.pendingSubranges[0]).toEqual({ lo: 16384, hi: Number.POSITIVE_INFINITY })
+  })
+
+  it('currentFacetRange returns the top-level facet when the stack is empty', () => {
+    const state: FacetCrawlState = { facetIndex: 0, pendingSubranges: [], lastPage: 0 }
+    expect(currentFacetRange(state, FACETS)).toEqual(FACETS[0])
+  })
+
+  it('currentFacetRange returns the stack head (LIFO) when a bisection is in progress', () => {
+    const state: FacetCrawlState = {
+      facetIndex: 0,
+      pendingSubranges: [
+        { lo: 64, hi: 127 },
+        { lo: 0, hi: 63 },
+      ],
+      lastPage: 0,
+    }
+    expect(currentFacetRange(state, FACETS)).toEqual({ lo: 0, hi: 63 })
+  })
+
+  it('currentFacetRange returns null once the ladder is exhausted', () => {
+    const state: FacetCrawlState = {
+      facetIndex: FACETS.length,
+      pendingSubranges: [],
+      lastPage: 0,
+    }
+    expect(currentFacetRange(state, FACETS)).toBeNull()
+  })
+
+  it('bisectCurrentFacet RETIRES the top-level facet (facetIndex++) before pushing halves', () => {
+    // C-1 regression: a top-level bisection must advance facetIndex so the facet
+    // is never re-queried after its halves drain (else it re-saturates forever).
+    const state: FacetCrawlState = { facetIndex: 0, pendingSubranges: [], lastPage: 4 }
+    const ok = bisectCurrentFacet(state, { lo: 0, hi: 127 })
+    expect(ok).toBe(true)
+    expect(state.pendingSubranges).toEqual([
+      { lo: 64, hi: 127 },
+      { lo: 0, hi: 63 },
+    ])
+    expect(currentFacetRange(state, FACETS)).toEqual({ lo: 0, hi: 63 }) // crawled next
+    expect(state.lastPage).toBe(0)
+    expect(state.facetIndex).toBe(1) // top-level facet retired
+  })
+
+  it('C-1: after a top-level facet bisects and both halves drain, the NEXT facet is reached (no re-crawl)', () => {
+    const state: FacetCrawlState = { facetIndex: 0, pendingSubranges: [], lastPage: 0 }
+    bisectCurrentFacet(state, FACETS[0]) // facet 0 saturated → halves on stack, facetIndex=1
+    expect(currentFacetRange(state, FACETS)).toEqual({ lo: 0, hi: 63 })
+    advanceFacet(state) // lower half done → pop
+    expect(currentFacetRange(state, FACETS)).toEqual({ lo: 64, hi: 127 })
+    advanceFacet(state) // upper half done → pop; stack now empty
+    // The frontier must move to facet 1, NOT back to the (saturating) facet 0.
+    expect(state.pendingSubranges).toEqual([])
+    expect(currentFacetRange(state, FACETS)).toEqual(FACETS[1])
+    expect(state.facetIndex).toBe(1)
+  })
+
+  it('bisectCurrentFacet replaces the stack head with its halves (sub-range bisection, facetIndex unchanged)', () => {
+    const state: FacetCrawlState = {
+      facetIndex: 2,
+      pendingSubranges: [{ lo: 0, hi: 63 }],
+      lastPage: 1,
+    }
+    bisectCurrentFacet(state, { lo: 0, hi: 63 })
+    expect(state.pendingSubranges).toEqual([
+      { lo: 32, hi: 63 },
+      { lo: 0, hi: 31 },
+    ])
+    expect(state.facetIndex).toBe(2) // a sub-range bisected — top-level index does not move
+  })
+
+  it('bisectCurrentFacet returns false for an unsplittable range (lo === hi)', () => {
+    const state: FacetCrawlState = { facetIndex: 0, pendingSubranges: [], lastPage: 0 }
+    expect(bisectCurrentFacet(state, { lo: 5, hi: 5 })).toBe(false)
+    expect(state.pendingSubranges).toEqual([])
+    expect(state.facetIndex).toBe(0) // no retirement on a failed bisect
+  })
+
+  it('advanceFacet pops the stack when bisecting, else increments facetIndex', () => {
+    const withStack: FacetCrawlState = {
+      facetIndex: 1,
+      pendingSubranges: [{ lo: 0, hi: 63 }],
+      lastPage: 3,
+    }
+    advanceFacet(withStack)
+    expect(withStack.pendingSubranges).toEqual([])
+    expect(withStack.facetIndex).toBe(1) // unchanged — a sub-range finished, not the facet
+    expect(withStack.lastPage).toBe(0)
+
+    const noStack: FacetCrawlState = { facetIndex: 1, pendingSubranges: [], lastPage: 3 }
+    advanceFacet(noStack)
+    expect(noStack.facetIndex).toBe(2)
+    expect(noStack.lastPage).toBe(0)
+  })
+
+  it('isFacetCrawlDone is true only when the ladder AND the bisection frontier are empty', () => {
+    expect(
+      isFacetCrawlDone({ facetIndex: FACETS.length, pendingSubranges: [], lastPage: 0 }, FACETS)
+    ).toBe(true)
+    expect(
+      isFacetCrawlDone(
+        { facetIndex: FACETS.length, pendingSubranges: [{ lo: 0, hi: 63 }], lastPage: 0 },
+        FACETS
+      )
+    ).toBe(false)
+    expect(isFacetCrawlDone({ facetIndex: 3, pendingSubranges: [], lastPage: 0 }, FACETS)).toBe(
+      false
+    )
+  })
+
+  it('facetStateToCursor → cursorToFacetState round-trips through JSON (Infinity survives as null)', () => {
+    const state: FacetCrawlState = {
+      facetIndex: 8,
+      pendingSubranges: [{ lo: 16384, hi: Number.POSITIVE_INFINITY }],
+      lastPage: 2,
+    }
+    const cursor = facetStateToCursor(state, '.agents/skills', FACETS)
+    // The open-ended upper bound is persisted as null (JSON-safe).
+    expect(cursor.pending_subranges).toEqual([[16384, null]])
+    expect(cursor.path).toBe('.agents/skills')
+
+    // Survive a real JSON round-trip (the audit_logs metadata path).
+    const roundTripped = JSON.parse(JSON.stringify(cursor))
+    const restored = cursorToFacetState(roundTripped)
+    expect(restored).toEqual(state)
+  })
+
+  it("facetStateToCursor reports facet 'done' when the ladder is exhausted", () => {
+    const cursor = facetStateToCursor(
+      { facetIndex: FACETS.length, pendingSubranges: [], lastPage: 0 },
+      '',
+      FACETS
+    )
+    expect(cursor.facet).toBe('done')
+    expect(cursor.facet_index).toBe(FACETS.length)
+  })
+})
diff --git a/scripts/tests/indexer/backfill-facet-crawl.test.ts b/scripts/tests/indexer/backfill-facet-crawl.test.ts
new file mode 100644
index 000000000..8a45aa543
--- /dev/null
+++ b/scripts/tests/indexer/backfill-facet-crawl.test.ts
@@ -0,0 +1,434 @@
+/**
+ * Size-faceted backfill crawl tests (SMI-5286 1c)
+ *
+ * Drives the public entry `runSubdirectorySearch(..., backfillPlan)` to prove the
+ * size-faceted depth-first crawl in `subdirectory-search.helpers.ts`
+ * (`runBackfillFacetCrawl`) behaves per the SPARC §#3/§#5 contract:
+ *   1. Exhausts the static 9-facet ladder when no range saturates → done, all 9
+ *      facets completed, cursor.facet === 'done'.
+ *   2. A facet whose page-1 total exceeds the 1000-cap is BISECTED (its page-1
+ *      repos are NOT collected); the bisected sub-ranges still crawl to
+ *      completion → cap_saturated true, done true.
+ *   3. Budget + resume round-trip: maxRangesPerDispatch bounds one dispatch; the
+ *      returned cursor resumes losslessly across dispatches until done, with
+ *      facets_completed monotonically advancing.
+ *   4. The crawl threads a `size:` qualifier as the 5th arg of every code-search
+ *      call.
+ *
+ * Strategy: mirrors `subdirectory-search.perskill.test.ts` exactly — mock every
+ * I/O boundary (rate-limit delay, github-auth, code-search, license-filter,
+ * skill-processor, trees-enumerate) at the module level, import the SUT AFTER the
+ * mocks, and let `buildSkillTreeUrl` (pure) and the facet ladder run real.
+ */
+
+import { describe, it, expect, vi, beforeEach } from 'vitest'
+import type { RateLimitTelemetry } from '../../indexer/_shared/rate-limit.ts'
+
+// ---------------------------------------------------------------------------
+// Module-level mocks — declared before any import of the SUT
+// (identical shape to subdirectory-search.perskill.test.ts)
+// ---------------------------------------------------------------------------
+
+// Mock delay so the 6s inter-page sleeps don't actually wait.
+vi.mock('../../indexer/_shared/rate-limit.ts', () => ({
+  GITHUB_API_DELAY: 0,
+  delay: vi.fn(async () => undefined),
+  withRateLimitTracking: vi.fn(),
+  withBackoff: vi.fn(async (fn: () => Promise<unknown>) => fn()),
+  newRateLimitTelemetry: vi.fn(() => ({})),
+}))
+
+vi.mock('../../indexer/_shared/github-auth.ts', () => ({
+  buildGitHubHeaders: vi.fn(async () => ({})),
+}))
+
+const mockSearchCode = vi.fn()
+vi.mock('../../indexer/code-search.ts', async (importOriginal) => {
+  const actual = await importOriginal<typeof import('../../indexer/code-search.ts')>()
+  return {
+    ...actual,
+    searchCodeForSkillMdInSubdirectory: (...args: unknown[]) => mockSearchCode(...args),
+  }
+})
+
+const mockFetchRepoLicense = vi.fn()
+vi.mock('../../indexer/license-filter.ts', async (importOriginal) => {
+  const actual = await importOriginal<typeof import('../../indexer/license-filter.ts')>()
+  return {
+    ...actual,
+    fetchRepoLicense: (...args: unknown[]) => mockFetchRepoLicense(...args),
+  }
+})
+
+const mockCheckSkillMdExists = vi.fn()
+vi.mock('../../indexer/skill-processor.ts', async (importOriginal) => {
+  const actual = await importOriginal<typeof import('../../indexer/skill-processor.ts')>()
+  return {
+    ...actual,
+    checkSkillMdExists: (...args: unknown[]) => mockCheckSkillMdExists(...args),
+  }
+})
+
+const mockEnumerateRepoSkillPaths = vi.fn()
+vi.mock('../../indexer/trees-enumerate.ts', async (importOriginal) => {
+  const actual = await importOriginal<typeof import('../../indexer/trees-enumerate.ts')>()
+  return {
+    ...actual,
+    enumerateRepoSkillPaths: (...args: unknown[]) => mockEnumerateRepoSkillPaths(...args),
+  }
+})
+
+// Imported AFTER mocks so the SUT binds the stubs.
+import { runSubdirectorySearch, type BackfillFacetPlan } from '../../indexer/subdirectory-search.ts'
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+const noTelemetry: RateLimitTelemetry = {} as RateLimitTelemetry
+
+/** Static ladder length — kept literal to assert against the SUT, not derive from it. */
+const LADDER_SIZE = 9
+
+/**
+ * Build a minimal GitHubRepository-shaped code-search hit. Owner is varied via a
+ * counter so per-repo dedup (`enumeratedRepos`) never swallows a later facet's
+ * single repo — though every assertion in this file keys on facet COUNTERS, not
+ * collected-repo counts, so this is belt-and-suspenders.
+ */
+let repoCounter = 0
+function makeCodeSearchRepo(overrides: Record<string, unknown> = {}) {
+  repoCounter += 1
+  const owner = `owner${repoCounter}`
+  return {
+    owner,
+    name: 'skills-repo',
+    fullName: `${owner}/skills-repo`,
+    description: 'test',
+    url: `https://github.com/${owner}/skills-repo/tree/main/skills/x`,
+    stars: 5,
+    forks: 0,
+    topics: ['claude-code-skill'],
+    updatedAt: new Date().toISOString(),
+    defaultBranch: 'main',
+    installable: false,
+    repoName: 'skills-repo',
+    skillPath: 'skills/x',
+    discoveryPath: 'subdirectory_search:broad',
+    ...overrides,
+  }
+}
+
+/**
+ * A non-saturating facet: page 1 returns ONE repo (total well under the cap, but
+ * repos.length === 1 < perPage → short page → range exhausted in a single page).
+ * page>=2 returns an empty short page as a defensive backstop.
+ */
+function nonSaturatingPage(page: number) {
+  if (page === 1) {
+    return { repos: [makeCodeSearchRepo()], total: 5, retries: 0, incomplete_results: false }
+  }
+  return { repos: [], total: 5, retries: 0, incomplete_results: false }
+}
+
+/** A saturated facet: page-1 total exceeds the 1000-result code-search cap. */
+function saturatedPage1() {
+  // repos here MUST NOT be collected (the crawl bisects before processing them).
+  return {
+    repos: [makeCodeSearchRepo({ skillPath: 'skills/should-not-collect' })],
+    total: 5000,
+    retries: 0,
+    incomplete_results: false,
+  }
+}
+
+/** A BackfillFacetPlan with broad (no path:) query, overridable per test. */
+function makePlan(overrides: Partial<BackfillFacetPlan> = {}): BackfillFacetPlan {
+  return {
+    startCursor: null,
+    pathPrefix: undefined,
+    perPage: 100,
+    maxPagesPerRange: 20,
+    maxRangesPerDispatch: 100,
+    ...overrides,
+  }
+}
+
+beforeEach(() => {
+  repoCounter = 0
+  mockSearchCode.mockReset()
+  mockFetchRepoLicense.mockReset()
+  mockCheckSkillMdExists.mockReset()
+  mockEnumerateRepoSkillPaths.mockReset()
+
+  // Default I/O behaviour: permissive license, validation passes, one skill per repo.
+  mockFetchRepoLicense.mockResolvedValue({ license: 'MIT', fetchFailed: false })
+  mockCheckSkillMdExists.mockResolvedValue(true)
+  mockEnumerateRepoSkillPaths.mockResolvedValue({
+    entries: [{ path: 'skills/x', blobSha: 'sha1' }],
+    truncatedByCap: false,
+    truncatedByApi: false,
+  })
+})
+
+// ---------------------------------------------------------------------------
+
+describe('runSubdirectorySearch — size-faceted backfill crawl (SMI-5286 1c)', () => {
+  it('Case 1: exhausts the full 9-facet ladder when no range saturates', async () => {
+    // Every facet/sub-range is non-saturating and exhausts in a single short page.
+    mockSearchCode.mockImplementation(async (_pathPrefix: unknown, page: number) =>
+      nonSaturatingPage(page)
+    )
+
+    const result = await runSubdirectorySearch(
+      new Set<string>(),
+      new Map(),
+      {},
+      // maxPages is ignored on the backfill path (plan.maxPagesPerRange governs).
+      1,
+      noTelemetry,
+      makePlan({ maxRangesPerDispatch: 100 })
+    )
+
+    expect(result.backfill).toBeDefined()
+    const backfill = result.backfill!
+    expect(backfill.done).toBe(true)
+    expect(backfill.facets_completed).toBe(LADDER_SIZE)
+    expect(backfill.facets_total).toBe(LADDER_SIZE)
+    expect(backfill.cap_saturated).toBe(false)
+    // With no saturation, every top-level facet is one range → 9 ranges crawled.
+    expect(backfill.ranges_crawled).toBe(LADDER_SIZE)
+    // Terminal cursor: ladder exhausted → facet sentinel 'done', index at the end.
+    expect(backfill.cursor.facet).toBe('done')
+    expect(backfill.cursor.facet_index).toBe(LADDER_SIZE)
+    expect(backfill.cursor.pending_subranges).toEqual([])
+  })
+
+  it('Case 2: a saturated facet bisects (page-1 repos not collected) and still completes', async () => {
+    // FIRST top-level facet (size:0..127) saturates on page 1; every later range
+    // (including the saturated facet's bisected sub-ranges) is non-saturating.
+    let firstCall = true
+    mockSearchCode.mockImplementation(async (_pathPrefix: unknown, page: number) => {
+      if (firstCall) {
+        firstCall = false
+        // page 1 of the very first facet → saturated.
+        return saturatedPage1()
+      }
+      return nonSaturatingPage(page)
+    })
+
+    const result = await runSubdirectorySearch(
+      new Set<string>(),
+      new Map(),
+      {},
+      1,
+      noTelemetry,
+      makePlan({ maxRangesPerDispatch: 100 })
+    )
+
+    const backfill = result.backfill!
+    expect(backfill.cap_saturated).toBe(true)
+    // The crawl still drains the whole ladder because the saturated facet's
+    // sub-ranges (which return total:5) get crawled before facet 0 advances.
+    expect(backfill.done).toBe(true)
+    expect(backfill.facets_completed).toBe(LADDER_SIZE)
+    expect(backfill.cursor.facet).toBe('done')
+
+    // No repo was collected from the saturated page-1: that page returned a repo
+    // whose skillPath was 'should-not-collect', but the crawl bisected BEFORE
+    // calling processSearchResults on it. Every collected row therefore comes from
+    // the (bisected) sub-ranges / later facets, never the capped page.
+    const collectedPaths = result.repos.map((r) => r.skillPath)
+    expect(collectedPaths).not.toContain('skills/should-not-collect')
+    // enumerateRepoSkillPaths is only reached via processSearchResults, so the
+    // saturated repo was never enumerated.
+    const enumeratedOwners = mockEnumerateRepoSkillPaths.mock.calls.map((c) => c[0])
+    // The saturated repo is owner1 (first makeCodeSearchRepo call). It must NOT
+    // appear among enumerated owners.
+    expect(enumeratedOwners).not.toContain('owner1')
+  })
+
+  it('Case 3: budget + resume round-trip resumes losslessly across dispatches', async () => {
+    // Non-saturating for the whole run: each facet exhausts in one range.
+    mockSearchCode.mockImplementation(async (_pathPrefix: unknown, page: number) =>
+      nonSaturatingPage(page)
+    )
+
+    // --- Dispatch 1: budget of 2 ranges. ---
+    const seen = new Set<string>()
+    const cache = new Map()
+    const first = await runSubdirectorySearch(
+      seen,
+      cache,
+      {},
+      1,
+      noTelemetry,
+      makePlan({ maxRangesPerDispatch: 2 })
+    )
+    const firstBackfill = first.backfill!
+
+    expect(firstBackfill.done).toBe(false)
+    expect(firstBackfill.ranges_crawled).toBe(2)
+    // Two non-saturating facets completed; cursor is partway through the ladder.
+    expect(firstBackfill.facets_completed).toBe(2)
+    expect(firstBackfill.cursor.facet_index).toBe(2)
+    expect(firstBackfill.cursor.facet_index).toBeLessThan(LADDER_SIZE)
+    expect(firstBackfill.cursor.facet).not.toBe('done')
+
+    // --- Dispatch 2: resume from the returned cursor (fresh seenUrls; the cursor,
+    // not the dedup set, carries crawl position — documenting the choice). ---
+    const second = await runSubdirectorySearch(
+      new Set<string>(),
+      new Map(),
+      {},
+      1,
+      noTelemetry,
+      makePlan({ startCursor: firstBackfill.cursor, maxRangesPerDispatch: 2 })
+    )
+    const secondBackfill = second.backfill!
+
+    // Resumed: strictly MORE facets done than dispatch 1 (no facets lost / redone).
+    expect(secondBackfill.facets_completed).toBeGreaterThan(firstBackfill.facets_completed)
+    expect(secondBackfill.facets_completed).toBe(4)
+    expect(secondBackfill.cursor.facet_index).toBe(4)
+
+    // --- Drain the rest in a loop until done; assert it reaches the full ladder. ---
+    let cursor = secondBackfill.cursor
+    let done = secondBackfill.done
+    let lastCompleted = secondBackfill.facets_completed
+    let guard = 0
+    while (!done) {
+      if (guard++ > 20) throw new Error('resume loop did not converge')
+      const next = await runSubdirectorySearch(
+        new Set<string>(),
+        new Map(),
+        {},
+        1,
+        noTelemetry,
+        makePlan({ startCursor: cursor, maxRangesPerDispatch: 2 })
+      )
+      const nb = next.backfill!
+      // Monotonic non-regression of completed facets across every dispatch.
+      expect(nb.facets_completed).toBeGreaterThanOrEqual(lastCompleted)
+      lastCompleted = nb.facets_completed
+      cursor = nb.cursor
+      done = nb.done
+    }
+
+    expect(lastCompleted).toBe(LADDER_SIZE)
+    expect(cursor.facet).toBe('done')
+  })
+
+  it('Case 4: passes a size: qualifier as the 5th arg to the code-search call', async () => {
+    mockSearchCode.mockImplementation(async (_pathPrefix: unknown, page: number) =>
+      nonSaturatingPage(page)
+    )
+
+    await runSubdirectorySearch(
+      new Set<string>(),
+      new Map(),
+      {},
+      1,
+      noTelemetry,
+      makePlan({ maxRangesPerDispatch: 3 })
+    )
+
+    expect(mockSearchCode).toHaveBeenCalled()
+    // Every call carries a 5th arg that is a `size:` qualifier string.
+    const sizeArgs = mockSearchCode.mock.calls.map((c) => c[4])
+    expect(sizeArgs.length).toBeGreaterThan(0)
+    for (const arg of sizeArgs) {
+      expect(typeof arg).toBe('string')
+      expect(arg as string).toMatch(/^size:/)
+    }
+    // The first facet (size:0..127) renders as `size:0..127`.
+    expect(sizeArgs).toContain('size:0..127')
+  })
+
+  // A mock where facet 0 (size:0..127) AND every bisected descendant (a finite
+  // range with hi <= 127) ALWAYS saturates; every other facet (lo >= 128, plus
+  // the open-ended tail) is non-saturating.
+  function facet0AlwaysSaturates() {
+    return async (
+      _pathPrefix: unknown,
+      page: number,
+      _perPage: unknown,
+      _telemetry: unknown,
+      sizeQualifier: string
+    ) => {
+      const m = /^size:(\d+)\.\.(\d+)$/.exec(sizeQualifier)
+      const withinFacet0 = m !== null && Number(m[2]) <= 127
+      if (withinFacet0 && page === 1) {
+        return { repos: [makeCodeSearchRepo()], total: 5000, retries: 0, incomplete_results: false }
+      }
+      return nonSaturatingPage(page)
+    }
+  }
+
+  it('Case 5 (C-1 regression): a PERSISTENTLY-saturating top-level facet is retired and the crawl still terminates', async () => {
+    // Pre-C-1-fix this looped forever re-crawling facet 0 (facets_completed stuck
+    // at 0). Post-fix facet 0 is retired on its first bisect, bisects down to
+    // single-byte truncation, then facets 1-8 complete to a clean terminal state.
+    mockSearchCode.mockImplementation(facet0AlwaysSaturates())
+
+    const result = await runSubdirectorySearch(
+      new Set<string>(),
+      new Map(),
+      {},
+      1,
+      noTelemetry,
+      makePlan({ maxRangesPerDispatch: 1000 })
+    )
+    const backfill = result.backfill!
+    expect(backfill.done).toBe(true) // terminates — no infinite loop
+    expect(backfill.facets_completed).toBe(LADDER_SIZE)
+    expect(backfill.cap_saturated).toBe(true)
+    // facet 0 bisects to single-byte buckets that can't subdivide → recorded truncated.
+    expect(backfill.truncated_repo_count).toBeGreaterThan(0)
+    expect(backfill.cursor.facet).toBe('done')
+  })
+
+  it('Case 6 (C-1 regression, budget-bounded): a saturating top-level facet advances facets_completed past 0', async () => {
+    // The decisive C-1 check: with a tiny budget and facet 0 always saturating,
+    // the top-level facet must RETIRE (facets_completed >= 1) — pre-fix it stayed
+    // at 0 across every dispatch, an infinite re-crawl that never made progress.
+    mockSearchCode.mockImplementation(facet0AlwaysSaturates())
+
+    const result = await runSubdirectorySearch(
+      new Set<string>(),
+      new Map(),
+      {},
+      1,
+      noTelemetry,
+      makePlan({ maxRangesPerDispatch: 3 })
+    )
+    expect(result.backfill!.facets_completed).toBeGreaterThanOrEqual(1)
+  })
+
+  it('M-1: a page error on a range is recorded as truncated and the crawl advances', async () => {
+    // First range errors on page 1; everything else is non-saturating.
+    let firstCall = true
+    mockSearchCode.mockImplementation(async (_pathPrefix: unknown, page: number) => {
+      if (firstCall) {
+        firstCall = false
+        return { repos: [], total: 0, retries: 0, incomplete_results: false, error: 'rate limited' }
+      }
+      return nonSaturatingPage(page)
+    })
+
+    const result = await runSubdirectorySearch(
+      new Set<string>(),
+      new Map(),
+      {},
+      1,
+      noTelemetry,
+      makePlan({ maxRangesPerDispatch: 100 })
+    )
+    const backfill = result.backfill!
+    // The errored range is surfaced (counted + in errors[]), not silently dropped,
+    // and the crawl advanced past it to complete the rest of the ladder.
+    expect(backfill.truncated_repo_count).toBeGreaterThanOrEqual(1)
+    expect(backfill.done).toBe(true)
+    expect(result.errors.some((e) => e.includes('rate limited'))).toBe(true)
+  })
+})
diff --git a/scripts/tests/indexer/code-search.facets.test.ts b/scripts/tests/indexer/code-search.facets.test.ts
new file mode 100644
index 000000000..d9e8ff971
--- /dev/null
+++ b/scripts/tests/indexer/code-search.facets.test.ts
@@ -0,0 +1,131 @@
+/**
+ * Tests for the size-facet partitioner (SMI-5286 Wave 1c)
+ *
+ * The facet ladder partitions the broad `filename:SKILL.md` code-search query by
+ * file SIZE so each sub-query stays under GitHub's 1000-result ceiling. These
+ * tests pin the load-bearing invariants: exhaustive+disjoint+contiguous coverage,
+ * a STABLE ladder length across calls (so `facets_total` is static for the
+ * checkpoint cursor), inclusive-inclusive `size:` qualifiers (no off-by-one), and
+ * the adaptive bisection contract (including its unsplittable guards).
+ */
+
+import { describe, it, expect } from 'vitest'
+import {
+  buildSizeFacets,
+  facetId,
+  facetToQualifier,
+  bisectFacet,
+  type SizeFacet,
+} from '../../indexer/code-search.facets.ts'
+
+describe('SMI-5286 Wave 1c: buildSizeFacets', () => {
+  it('is exhaustive, disjoint, and contiguous over [0, ∞)', () => {
+    const facets = buildSizeFacets()
+
+    // Starts at 0.
+    expect(facets[0].lo).toBe(0)
+
+    // Each subsequent lo is exactly the prior hi + 1 (disjoint + contiguous, no gaps).
+    for (let i = 0; i < facets.length - 1; i++) {
+      expect(facets[i + 1].lo).toBe(facets[i].hi + 1)
+      // Every interior bucket is finite and well-ordered.
+      expect(facets[i].hi).toBeGreaterThanOrEqual(facets[i].lo)
+      expect(Number.isFinite(facets[i].hi)).toBe(true)
+    }
+
+    // Final bucket is open-ended.
+    expect(facets[facets.length - 1].hi).toBe(Number.POSITIVE_INFINITY)
+  })
+
+  it('returns a STABLE ladder length across two calls (facets_total must be static)', () => {
+    const first = buildSizeFacets()
+    const second = buildSizeFacets()
+    expect(first.length).toBe(second.length)
+    // Same stable identity so facets_total never drifts mid-backfill.
+    expect(first).toBe(second)
+  })
+
+  it('uses the exact 9-bucket doubling ladder', () => {
+    const facets = buildSizeFacets()
+    expect(facets).toEqual([
+      { lo: 0, hi: 127 },
+      { lo: 128, hi: 255 },
+      { lo: 256, hi: 511 },
+      { lo: 512, hi: 1023 },
+      { lo: 1024, hi: 2047 },
+      { lo: 2048, hi: 4095 },
+      { lo: 4096, hi: 8191 },
+      { lo: 8192, hi: 16383 },
+      { lo: 16384, hi: Number.POSITIVE_INFINITY },
+    ])
+  })
+})
+
+describe('SMI-5286 Wave 1c: facetToQualifier', () => {
+  it('renders the first (finite) bucket as inclusive-inclusive size:0..127', () => {
+    const facets = buildSizeFacets()
+    expect(facetToQualifier(facets[0])).toBe('size:0..127')
+  })
+
+  it('renders the open-ended bucket as size:>=16384 (no off-by-one)', () => {
+    const facets = buildSizeFacets()
+    expect(facetToQualifier(facets[facets.length - 1])).toBe('size:>=16384')
+  })
+})
+
+describe('SMI-5286 Wave 1c: facetId', () => {
+  it('labels a finite bucket as `${lo}-${hi}`', () => {
+    expect(facetId({ lo: 0, hi: 127 })).toBe('0-127')
+  })
+
+  it('labels the open-ended bucket as `${lo}+`', () => {
+    expect(facetId({ lo: 16384, hi: Number.POSITIVE_INFINITY })).toBe('16384+')
+  })
+})
+
+describe('SMI-5286 Wave 1c: bisectFacet', () => {
+  it('splits a finite bucket into disjoint, contiguous halves covering the same union', () => {
+    const halves = bisectFacet({ lo: 0, hi: 127 })
+    expect(halves).not.toBeNull()
+    const [left, right] = halves as [SizeFacet, SizeFacet]
+    expect(left).toEqual({ lo: 0, hi: 63 })
+    expect(right).toEqual({ lo: 64, hi: 127 })
+    // Disjoint + contiguous: right.lo === left.hi + 1.
+    expect(right.lo).toBe(left.hi + 1)
+    // Same union: spans the original [0, 127].
+    expect(left.lo).toBe(0)
+    expect(right.hi).toBe(127)
+  })
+
+  it('splits the open-ended bucket by doubling the pivot', () => {
+    const halves = bisectFacet({ lo: 16384, hi: Number.POSITIVE_INFINITY })
+    expect(halves).not.toBeNull()
+    const [left, right] = halves as [SizeFacet, SizeFacet]
+    expect(left).toEqual({ lo: 16384, hi: 32767 })
+    expect(right).toEqual({ lo: 32768, hi: Number.POSITIVE_INFINITY })
+    // Disjoint + contiguous at the pivot.
+    expect(right.lo).toBe(left.hi + 1)
+  })
+
+  it('returns null for an unsplittable finite facet (lo >= hi)', () => {
+    expect(bisectFacet({ lo: 5, hi: 5 })).toBeNull()
+    // Inverted/degenerate range is also unsplittable.
+    expect(bisectFacet({ lo: 10, hi: 9 })).toBeNull()
+  })
+
+  it('returns null for an open-ended facet anchored at 0 (cannot double)', () => {
+    expect(bisectFacet({ lo: 0, hi: Number.POSITIVE_INFINITY })).toBeNull()
+  })
+
+  it('returns null for an open-ended facet past the 4 MiB ceiling (terminates persistent saturation)', () => {
+    // A SKILL.md larger than 4 MiB is not a real skill; an open-ended bucket
+    // doubling forever would never reach lo === hi, so the ceiling makes a
+    // persistently-saturating open-ended tail terminate (recorded truncated)
+    // instead of bisecting infinitely (SMI-5286 1c C-1 follow-up).
+    const ceiling = 4 * 1024 * 1024
+    expect(bisectFacet({ lo: ceiling, hi: Number.POSITIVE_INFINITY })).toBeNull()
+    expect(bisectFacet({ lo: ceiling + 1, hi: Number.POSITIVE_INFINITY })).toBeNull()
+    // Just below the ceiling still splits.
+    expect(bisectFacet({ lo: ceiling / 2, hi: Number.POSITIVE_INFINITY })).not.toBeNull()
+  })
+})
diff --git a/scripts/tests/indexer/community-url-fork.test.ts b/scripts/tests/indexer/community-url-fork.test.ts
index b6d859dd0..bd7976c4c 100644
--- a/scripts/tests/indexer/community-url-fork.test.ts
+++ b/scripts/tests/indexer/community-url-fork.test.ts
@@ -451,3 +451,71 @@ describe('SMI-5176 freshness qualifier contracts', () => {
     expect(decodedQ).not.toContain('pushed:')
   })
 })
+
+// ---------------------------------------------------------------------------
+// SMI-5286 1c: size-facet qualifier + per_page=100 contracts
+// Asserts the size: qualifier reaches the emitted fetch URL (encoded) and that
+// the new default per_page=100 is present. Mirrors the freshness-qualifier
+// harness above (capture URL via fetch mock, then inspect q= and per_page=).
+// ---------------------------------------------------------------------------
+
+describe('SMI-5286 1c size-facet qualifier + per_page contracts', () => {
+  beforeEach(() => vi.restoreAllMocks())
+
+  it('appends a sizeQualifier (URL-encoded) to the code-search query', async () => {
+    let capturedUrl = ''
+    vi.spyOn(globalThis, 'fetch').mockImplementationOnce(async (url) => {
+      capturedUrl = String(url)
+      return makeFetchOk({ total_count: 0, incomplete_results: false, items: [] })
+    })
+
+    await searchCodeForSkillMdInSubdirectory(undefined, 1, 100, noTelemetry, 'size:0..127')
+
+    // Raw URL carries the percent-encoded colon (size%3A0..127).
+    expect(capturedUrl).toContain('size%3A0..127')
+    // Decoded query contains the literal qualifier alongside the base filter.
+    const decodedQ = decodeURIComponent(capturedUrl.split('q=')[1]?.split('&')[0] ?? '')
+    expect(decodedQ).toContain('size:0..127')
+    expect(decodedQ).toContain('filename:SKILL.md')
+  })
+
+  it('omits any size: qualifier when sizeQualifier is not supplied', async () => {
+    let capturedUrl = ''
+    vi.spyOn(globalThis, 'fetch').mockImplementationOnce(async (url) => {
+      capturedUrl = String(url)
+      return makeFetchOk({ total_count: 0, incomplete_results: false, items: [] })
+    })
+
+    await searchCodeForSkillMdInSubdirectory(undefined, 1, 100, noTelemetry)
+
+    const decodedQ = decodeURIComponent(capturedUrl.split('q=')[1]?.split('&')[0] ?? '')
+    expect(decodedQ).not.toContain('size:')
+  })
+
+  it('emits per_page=100 (GitHub max) for searchCodeForSkillMdInSubdirectory', async () => {
+    let capturedUrl = ''
+    vi.spyOn(globalThis, 'fetch').mockImplementationOnce(async (url) => {
+      capturedUrl = String(url)
+      return makeFetchOk({ total_count: 0, incomplete_results: false, items: [] })
+    })
+
+    // telemetry is positional after perPage, so the default cannot be exercised
+    // by omitting perPage; pass the new default (100) explicitly to assert it
+    // reaches the URL.
+    await searchCodeForSkillMdInSubdirectory(undefined, 1, 100, noTelemetry)
+
+    expect(capturedUrl).toContain('per_page=100')
+  })
+
+  it('emits per_page=100 (GitHub max) for searchCodeForSkillMd (root phase)', async () => {
+    let capturedUrl = ''
+    vi.spyOn(globalThis, 'fetch').mockImplementationOnce(async (url) => {
+      capturedUrl = String(url)
+      return makeFetchOk({ total_count: 0, incomplete_results: false, items: [] })
+    })
+
+    await searchCodeForSkillMd(1, 100, noTelemetry)
+
+    expect(capturedUrl).toContain('per_page=100')
+  })
+})
diff --git a/scripts/tests/indexer/parse-env.backfill.test.ts b/scripts/tests/indexer/parse-env.backfill.test.ts
index 2de218614..68fa26b60 100644
--- a/scripts/tests/indexer/parse-env.backfill.test.ts
+++ b/scripts/tests/indexer/parse-env.backfill.test.ts
@@ -84,3 +84,62 @@ describe('parseEnv — BACKFILL_MODE (SMI-5286 Wave 1b)', () => {
     expect('BACKFILL_MODE' in env).toBe(true)
   })
 })
+
+describe('parseEnv — SMI-5286 1c backfill levers', () => {
+  let originalEnv: NodeJS.ProcessEnv
+
+  beforeEach(() => {
+    originalEnv = { ...process.env }
+    for (const k of Object.keys(process.env)) {
+      delete process.env[k]
+    }
+    Object.assign(process.env, BASE_ENV)
+  })
+
+  afterEach(() => {
+    process.env = originalEnv
+  })
+
+  it('BACKFILL_PATH_PREFIX is undefined when absent or empty', () => {
+    delete process.env.BACKFILL_PATH_PREFIX
+    expect(parseEnv().BACKFILL_PATH_PREFIX).toBeUndefined()
+    process.env.BACKFILL_PATH_PREFIX = ''
+    expect(parseEnv().BACKFILL_PATH_PREFIX).toBeUndefined()
+  })
+
+  it('BACKFILL_PATH_PREFIX passes a non-empty prefix through verbatim', () => {
+    process.env.BACKFILL_PATH_PREFIX = '.agents/skills'
+    expect(parseEnv().BACKFILL_PATH_PREFIX).toBe('.agents/skills')
+  })
+
+  it('BACKFILL_MAX_RANGES defaults to 150 and honors an override', () => {
+    delete process.env.BACKFILL_MAX_RANGES
+    expect(parseEnv().BACKFILL_MAX_RANGES).toBe(150)
+    process.env.BACKFILL_MAX_RANGES = '40'
+    expect(parseEnv().BACKFILL_MAX_RANGES).toBe(40)
+  })
+
+  it('raises the cap DEFAULTS only when BACKFILL_MODE is set (C-5)', () => {
+    // Cron defaults (backfill off)
+    const cron = parseEnv()
+    expect(cron.MAX_PAGES).toBe(5)
+    expect(cron.MAX_REPOS).toBe(100)
+    expect(cron.CODE_SEARCH_MAX_PAGES).toBe(1)
+
+    // Backfill defaults (no explicit caps set)
+    process.env.BACKFILL_MODE = 'true'
+    const backfill = parseEnv()
+    expect(backfill.MAX_PAGES).toBe(10)
+    expect(backfill.MAX_REPOS).toBe(500)
+    expect(backfill.CODE_SEARCH_MAX_PAGES).toBe(10)
+  })
+
+  it('explicit cap env vars still override the backfill defaults', () => {
+    process.env.BACKFILL_MODE = 'true'
+    process.env.CODE_SEARCH_MAX_PAGES = '3'
+    process.env.MAX_PAGES = '7'
+    const env = parseEnv()
+    expect(env.CODE_SEARCH_MAX_PAGES).toBe(3)
+    expect(env.MAX_PAGES).toBe(7)
+  })
+})
diff --git a/scripts/tests/indexer/trees-search.test.ts b/scripts/tests/indexer/trees-search.test.ts
new file mode 100644
index 000000000..a959da6e1
--- /dev/null
+++ b/scripts/tests/indexer/trees-search.test.ts
@@ -0,0 +1,113 @@
+/**
+ * Unit tests for trees-search.ts — fetchSkillPathsFromTree (SMI-5286 1c §C-4)
+ *
+ * Asserts the root-level SKILL.md handling: a blob at repo-root `SKILL.md`
+ * must enumerate to a TreeSkillEntry with path:'' (buildSkillTreeUrl maps '' →
+ * …/tree/<branch>), NOT be silently dropped. A nested `tools/foo/SKILL.md`
+ * yields its parent dir, and a `use-skill.md` blob must NOT match (suffix gate).
+ *
+ * Mocks the network layer at globalThis.fetch so the production HTTP plumbing
+ * is exercised but no real requests are made. Matches the mock pattern used in
+ * community-url-fork.test.ts (vi.mock rate-limit passthrough + vi.spyOn fetch).
+ */
+
+import { describe, it, expect, vi, afterEach, beforeEach } from 'vitest'
+import type { RateLimitTelemetry } from '../../indexer/_shared/rate-limit.ts'
+
+// ---------------------------------------------------------------------------
+// Keep rate-limit helpers fast: mock delay + withRateLimitTracking to forward
+// the fetch call directly so we can spy on globalThis.fetch.
+// ---------------------------------------------------------------------------
+
+vi.mock('../../indexer/_shared/rate-limit.ts', () => ({
+  GITHUB_API_DELAY: 0,
+  delay: vi.fn(async () => undefined),
+  withBackoff: vi.fn(async (fn: () => Promise<unknown>) => fn()),
+  // Let withRateLimitTracking call globalThis.fetch directly (transparent).
+  withRateLimitTracking: vi.fn(async (_telemetry: unknown, url: string, opts?: RequestInit) => {
+    const init = opts ? { headers: opts.headers } : {}
+    return globalThis.fetch(url, init)
+  }),
+}))
+
+vi.mock('../../indexer/_shared/github-auth.ts', () => ({
+  buildGitHubHeaders: vi.fn(async () => ({ Authorization: 'Bearer test-token' })),
+}))
+
+// Imported AFTER mocks so the SUT binds the stub.
+import { fetchSkillPathsFromTree } from '../../indexer/trees-search.ts'
+
+afterEach(() => vi.restoreAllMocks())
+
+const noTelemetry: RateLimitTelemetry = {} as RateLimitTelemetry
+
+// ---------------------------------------------------------------------------
+// Helpers to build minimal GitHub Trees API response payloads
+// ---------------------------------------------------------------------------
+
+function makeBlob(path: string, sha: string) {
+  return {
+    path,
+    mode: '100644',
+    type: 'blob',
+    sha,
+    size: 123,
+    url: `https://api.github.com/repos/acme/my-skills/git/blobs/${sha}`,
+  }
+}
+
+function makeFetchOk(body: unknown): Response {
+  return {
+    ok: true,
+    status: 200,
+    headers: { get: () => null },
+    json: async () => body,
+  } as unknown as Response
+}
+
+// ---------------------------------------------------------------------------
+// Root-level SKILL.md handling (SMI-5286 1c §C-4)
+// ---------------------------------------------------------------------------
+
+describe('fetchSkillPathsFromTree — root-level SKILL.md (SMI-5286 1c §C-4)', () => {
+  beforeEach(() => vi.restoreAllMocks())
+
+  it('emits path:"" for a root SKILL.md and the parent dir for a nested one; ignores use-skill.md', async () => {
+    vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
+      makeFetchOk({
+        sha: 'treesha',
+        url: 'https://api.github.com/repos/acme/my-skills/git/trees/main',
+        tree: [
+          makeBlob('SKILL.md', 'rootsha'), // root SKILL.md → path ''
+          makeBlob('tools/foo/SKILL.md', 'nestedsha'), // nested → parent dir
+          makeBlob('docs/use-skill.md', 'usesha'), // suffix gate: must NOT match
+        ],
+        truncated: false,
+      })
+    )
+
+    const result = await fetchSkillPathsFromTree('acme', 'my-skills', 'main', noTelemetry)
+
+    expect(result.entries).toEqual([
+      { path: '', blobSha: 'rootsha' },
+      { path: 'tools/foo', blobSha: 'nestedsha' },
+    ])
+    expect(result.truncated).toBe(false)
+    expect(result.errors).toHaveLength(0)
+  })
+
+  it('does NOT match use-skill.md even when it is the only blob (suffix gate)', async () => {
+    vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
+      makeFetchOk({
+        sha: 'treesha',
+        url: 'https://api.github.com/repos/acme/my-skills/git/trees/main',
+        tree: [makeBlob('use-skill.md', 'usesha')],
+        truncated: false,
+      })
+    )
+
+    const result = await fetchSkillPathsFromTree('acme', 'my-skills', 'main', noTelemetry)
+
+    expect(result.entries).toHaveLength(0)
+  })
+})