Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion .github/e2e-dashboard/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@
.muted { color:var(--muted); } .small { font-size:12px; }
.group-h { margin:12px 0 4px; font-size:13px; font-weight:700; }
canvas { max-height:340px; }
.catbadge { display:inline-block; font-size:12px; font-weight:600; padding:2px 8px; border-radius:6px; }
.cat-gap { background:#e8eefc; color:#2949b8; }
.cat-env { background:#fdf3dc; color:#9a7400; }
.cat-real { background:#fbe7e7; color:#d83b3b; }
.legend { list-style:none; margin:10px 0 4px; padding:0; font-size:12px; color:var(--muted); }
.legend li { margin:5px 0; line-height:1.5; }
</style>
</head>
<body>
Expand Down Expand Up @@ -98,6 +104,21 @@ <h2>Run history <span class="muted small">(click a row to expand failures)</span
const fmtDate = ts => { if (!ts) return "—"; const d = new Date(ts); return isNaN(d) ? ts : d.toISOString().slice(0,16).replace("T"," "); };
const esc = s => (s||"").replace(/[&<>"]/g, c => ({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;"}[c]));

// Root-cause categories emitted by parse-trx-to-json.py. A low raw pass-rate is
// usually dominated by expected Reyden gaps; the "Real issue" bucket is the
// actual backlog. A test with a CREATE step that Reyden can't run fails AT that
// step (an "Unsupported …" gap); a value mismatch means setup succeeded and the
// round-trip returned wrong data — a genuine bug.
const CATEGORIES = {
"Reyden capability gap (expected)": { cls:"cat-gap",
blurb:"The backend doesn't support the operation or protocol — unsupported DDL/statement/type (e.g. CREATE TABLE/SCHEMA), no Thrift endpoint on a SEA/REST-only warehouse, or CloudFetch. Expected: gate/skip these; they shouldn't count against the driver." },
"Environment / infra": { cls:"cat-env",
blurb:"Missing/misconfigured warehouse, read-only rejection, auth, timeout, or transport error. Not a driver bug — fix the environment or retry." },
"Real issue / to investigate": { cls:"cat-real",
blurb:"A genuine driver bug. A value/cast mismatch on an INSERT→SELECT→DELETE round-trip means setup succeeded but the data came back wrong (e.g. a SEA-path serialization difference); also covers SQL/syntax errors and anything unclassified." },
};
const catBadge = name => `<span class="catbadge ${(CATEGORIES[name]||{}).cls||""}">${esc(name)}</span>`;

function bar(p, f, s) {
const t = p+f+s || 1;
return `<div class="bar" title="${p} passed / ${f} failed / ${s} skipped">
Expand Down Expand Up @@ -142,13 +163,26 @@ <h2>Run history <span class="muted small">(click a row to expand failures)</span
function renderAnalysis(latest) {
const el = document.getElementById("analysis");
if (!latest || !latest.failed) { el.innerHTML = '<p class="muted">No failures in the latest run. 🎉</p>'; return; }
const catEntries = Object.entries(latest.by_category||{});
const catRows = catEntries.map(([k,v]) =>
`<tr><td>${catBadge(k)}</td><td class="num">${v}</td></tr>`).join("");
const legend = catEntries.length
? `<ul class="legend">` + catEntries.map(([k]) =>
`<li>${catBadge(k)} ${esc((CATEGORIES[k]||{}).blurb||"")}</li>`).join("") + `</ul>`
: "";
const catSection = catEntries.length ? `
<div class="group-h">By root-cause category</div>
<table><thead><tr><th>Category</th><th class="num">Count</th></tr></thead><tbody>${catRows}</tbody></table>
${legend}` : "";

const sigRows = Object.entries(latest.by_signature||{}).map(([k,v]) =>
`<tr><td class="sig">${esc(k)}</td><td class="num">${v}</td></tr>`).join("");
const clsRows = Object.entries(latest.by_class||{}).map(([k,v]) =>
`<tr><td class="sig">${esc(k)}</td><td class="num">${v}</td></tr>`).join("");
el.innerHTML = `
<p class="muted small">Latest run <a href="${esc(latest.html_url)}">#${esc(latest.run_number||latest.run_id)}</a>
(${esc(latest.protocol)}${latest.read_only?", read-only":""}) — ${latest.failed} failures grouped below.</p>
${catSection}
<div class="group-h">By failure signature (root cause)</div>
<table><thead><tr><th>Signature</th><th class="num">Count</th></tr></thead><tbody>${sigRows}</tbody></table>
<div class="group-h">By test class</div>
Expand All @@ -167,8 +201,9 @@ <h2>Run history <span class="muted small">(click a row to expand failures)</span
const r = await fetch("data/" + run.detail);
const full = await r.json();
if (!full.failures || !full.failures.length) { det.firstChild.innerHTML = '<span class="muted">No failure detail recorded.</span>'; return; }
let html = "", curSig = null;
let html = "", curCat = null, curSig = null;
for (const f of full.failures) {
if (f.category && f.category !== curCat) { curCat = f.category; curSig = null; html += `<div class="group-h">${catBadge(f.category)}</div>`; }
if (f.signature !== curSig) { curSig = f.signature; html += `<div class="group-h sig">${esc(curSig)}</div>`; }
html += `<ul class="fail-list"><li><div class="tname">${esc(f.name)}</div>` +
(f.message ? `<div class="msg">${esc(f.message)}</div>` : "") + `</li></ul>`;
Expand Down
74 changes: 71 additions & 3 deletions .github/scripts/parse-trx-to-json.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,43 @@ def signature_for(message):
return "Unknown / no message"
m = message.replace("\r", " ").replace("\n", " ")

# Order matters: the most specific patterns win. In particular the
# Thrift-on-SEA and CloudFetch buckets must be matched before the generic
# "Couldn't connect / HttpRequestException" transport bucket, because their
# messages also contain that text.
patterns = [
# --- Reyden capability gaps (expected; see SIG_CATEGORY) -------------
# A Thrift session against a SEA/REST-only warehouse (e.g. Reyden) has
# no Thrift endpoint, so the server returns ENDPOINT_NOT_FOUND. This is
# NOT a missing/misconfigured warehouse — the warehouse works over REST.
(r"Thrift server error.*ENDPOINT_NOT_FOUND|ENDPOINT_NOT_FOUND.*Thrift server error",
"Thrift endpoint unavailable on SEA/Reyden warehouse"),
(r"Error in download process|CloudFetch.*download",
"CloudFetch not supported on Reyden (download failed)"),
(r"PARSER_UNSUPPORTED_FEATURE|UNSUPPORTED_FEATURE|Unsupported statement|"
r"Unsupported CREATE type|Unsupported Delta table type|Unsupported .*type",
"Reyden unsupported feature (DDL / statement / type)"),
# --- Environment / infra --------------------------------------------
(r"ENDPOINT_NOT_FOUND", "Warehouse not found (ENDPOINT_NOT_FOUND / HTTP 404)"),
(r"read[- ]?only|READ_ONLY|cannot be modified|not.*allowed.*read", "Read-only warehouse rejected write/DDL"),
(r"INSERT|UPDATE|DELETE|MERGE|CREATE TABLE|DROP TABLE|ALTER TABLE", "DML/DDL rejected"),
(r"PERMISSION_DENIED|not authorized|Forbidden|HTTP 403", "Permission denied (403)"),
(r"timeout|timed out|TimeoutException", "Timeout"),
(r"Couldn't connect|connection refused|HttpRequestException", "Connection / transport error"),
(r"TABLE_OR_VIEW_NOT_FOUND|cannot be found|does not exist", "Object not found"),
# --- Genuine SQL errors with specific codes ------------------------
# Checked before the broad assertion bucket below, whose "expected"
# token also appears in "Expected identifier ..." syntax-error text.
(r"PARSE_SYNTAX_ERROR|SYNTAX_ERROR", "SQL syntax error"),
# --- Genuine driver bugs --------------------------------------------
# A value/cast mismatch means the test got PAST any setup (a rejected
# CREATE TABLE would have failed earlier with an "Unsupported …" message
# in the Reyden-gap bucket above). So a wrong value on an
# INSERT→SELECT→DELETE round-trip is a real driver bug, e.g. a SEA-path
# result-serialization difference — not expected Reyden behaviour.
(r"CAST_INVALID_INPUT", "Type cast mismatch on round-trip"),
(r"TABLE_OR_VIEW_NOT_FOUND|cannot be found|does not exist", "Object not found"),
(r"Assert\.|Equal\(|Xunit|expected", "Assertion failed (value mismatch)"),
# --- Generic DML/DDL rejection (catch-all, lowest priority) --------
(r"INSERT|UPDATE|DELETE|MERGE|CREATE TABLE|DROP TABLE|ALTER TABLE", "DML/DDL rejected"),
]
for pat, label in patterns:
if re.search(pat, m, re.IGNORECASE):
Expand All @@ -69,6 +96,43 @@ def signature_for(message):
return "Other"


# Root-cause category layered on top of the fine-grained signature. The
# dashboard rolls failures up to these three buckets so a low raw pass-rate
# (dominated by expected Reyden gaps) doesn't mask the genuine driver bugs.
#
# Classification hinges on WHICH step failed, which the message already encodes:
# - A test with a CREATE TABLE/SCHEMA step that Reyden can't run fails AT that
# step with an "Unsupported …" message -> CAT_REYDEN_GAP (expected).
# - A value/cast mismatch means setup succeeded and the INSERT→SELECT→DELETE
# round-trip returned wrong data -> CAT_REAL (a genuine driver bug).
CAT_REYDEN_GAP = "Reyden capability gap (expected)"
CAT_ENVIRONMENT = "Environment / infra"
CAT_REAL = "Real issue / to investigate"

# Explicit signature -> category map. Any signature not listed here (including
# the dynamic "<ExceptionType>" fallbacks and the value/cast/DML/syntax buckets)
# is treated as CAT_REAL so genuine, unclassified failures surface rather than
# hide.
_SIGNATURE_CATEGORY = {
"Thrift endpoint unavailable on SEA/Reyden warehouse": CAT_REYDEN_GAP,
"CloudFetch not supported on Reyden (download failed)": CAT_REYDEN_GAP,
"Reyden unsupported feature (DDL / statement / type)": CAT_REYDEN_GAP,
"Warehouse not found (ENDPOINT_NOT_FOUND / HTTP 404)": CAT_ENVIRONMENT,
"Read-only warehouse rejected write/DDL": CAT_ENVIRONMENT,
"Permission denied (403)": CAT_ENVIRONMENT,
"Timeout": CAT_ENVIRONMENT,
"Connection / transport error": CAT_ENVIRONMENT,
# "Assertion failed (value mismatch)", "Type cast mismatch on round-trip",
# "Object not found", "DML/DDL rejected", "SQL syntax error", "Other",
# "Unknown / no message" and any "<ExceptionType>" fall through to CAT_REAL.
}


def category_for(signature):
"""Map a fine-grained signature to one of the four root-cause buckets."""
return _SIGNATURE_CATEGORY.get(signature, CAT_REAL)


def class_of(test_name):
"""Best-effort owning class: strip the parameter list and the method."""
base = test_name.split("(", 1)[0]
Expand Down Expand Up @@ -150,16 +214,19 @@ def main():
# Trim payload: keep full detail for failures only.
for r in failed:
r["signature"] = signature_for(r["message"])
r["category"] = category_for(r["signature"])
if len(r["message"]) > 2000:
r["message"] = r["message"][:2000] + " …(truncated)"
if len(r["stack"]) > 2000:
r["stack"] = r["stack"][:2000] + " …(truncated)"

by_signature = {}
by_class = {}
by_category = {}
for r in failed:
by_signature[r["signature"]] = by_signature.get(r["signature"], 0) + 1
by_class[r["class"]] = by_class.get(r["class"], 0) + 1
by_category[r["category"]] = by_category.get(r["category"], 0) + 1

total = len(all_results)
record = {
Expand All @@ -182,9 +249,10 @@ def main():
"failed": len(failed),
"skipped": len(skipped),
"pass_rate": round(100.0 * len(passed) / total, 1) if total else 0.0,
"by_category": dict(sorted(by_category.items(), key=lambda kv: -kv[1])),
"by_signature": dict(sorted(by_signature.items(), key=lambda kv: -kv[1])),
"by_class": dict(sorted(by_class.items(), key=lambda kv: -kv[1])),
"failures": sorted(failed, key=lambda r: (r["signature"], r["name"])),
"failures": sorted(failed, key=lambda r: (r["category"], r["signature"], r["name"])),
}

with open(out_path, "w") as f:
Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/update-e2e-dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def main():
summary = {k: record[k] for k in (
"run_id", "run_attempt", "run_number", "timestamp", "commit", "branch",
"protocol", "read_only", "html_url", "total", "passed", "failed",
"skipped", "pass_rate", "by_signature", "by_class",
"skipped", "pass_rate", "by_category", "by_signature", "by_class",
) if k in record}
summary["detail"] = detail_name

Expand Down
Loading