Skip to content

Commit 3800cb7

Browse files
authored
Update fetch-data.ts
1 parent 1c1f448 commit 3800cb7

File tree

1 file changed

+212
-66
lines changed

1 file changed

+212
-66
lines changed

scripts/fetch-data.ts

Lines changed: 212 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -565,109 +565,255 @@ function replacePrivateImage(markdown: string, html: string): string {
565565
return html;
566566
}
567567

568+
// Replace the existing extractModulePropsFromZip with this enhanced diagnostic version.
568569
async function extractModulePropsFromZip(downloadUrl: string): Promise<Record<string, string>> {
569-
try {
570-
// Diagnostic log: show the download URL (truncated if very long)
571-
const truncUrl = downloadUrl.length > 200 ? `${downloadUrl.slice(0, 200)}...[truncated]` : downloadUrl;
572-
console.log(`Attempting to extract module.prop from URL: ${truncUrl}`);
573-
574-
// Extract module.prop content from zip URL (internal network, stable)
575-
console.log(`Running runzip to stream module.prop from remote zip (this may fail if URL needs special headers)`);
576-
const { stdout: modulePropContent } = await execAsync(`runzip -p "${downloadUrl}" module.prop`, {
577-
encoding: 'utf8',
578-
// increase buffer to reduce occasional truncation problems for larger outputs
579-
maxBuffer: 256 * 1024 // 256KB buffer
580-
});
570+
const props: Record<string, string> = {};
571+
const token = process.env.GRAPHQL_TOKEN || '';
572+
const tmpdir = (await import('os')).tmpdir();
573+
const fs = await import('fs');
574+
const path = await import('path');
575+
const util = await import('util');
576+
const execP = util.promisify((await import('child_process')).exec);
581577

582-
// Parse module.prop content
583-
const props: Record<string, string> = {};
584-
if (!modulePropContent) {
585-
console.warn(`runzip returned empty output for ${truncUrl}`);
586-
return props;
587-
}
578+
// Short log helper
579+
const trunc = (s: string, n = 200) => (s && s.length > n ? s.slice(0, n) + '...[truncated]' : s);
588580

589-
const lines = modulePropContent.split('\n');
590-
for (const line of lines) {
591-
const trimmed = line.trim();
592-
if (!trimmed || trimmed.startsWith('#')) continue;
581+
console.log(`Diagnostic: starting extraction for URL: ${trunc(downloadUrl, 400)}`);
593582

594-
const eqIndex = trimmed.indexOf('=');
595-
if (eqIndex > 0) {
596-
const key = trimmed.substring(0, eqIndex).trim();
597-
const value = trimmed.substring(eqIndex + 1).trim();
598-
props[key] = value;
583+
// Try to download into memory via fetch (preferred) with retries
584+
let buffer: Buffer | null = null;
585+
try {
586+
// dynamic fetch (use global fetch or node-fetch)
587+
let fetchFn: any;
588+
if (typeof (globalThis as any).fetch === 'function') {
589+
fetchFn = (globalThis as any).fetch.bind(globalThis);
590+
} else {
591+
try {
592+
const mod = await import('node-fetch');
593+
fetchFn = (mod.default || mod) as any;
594+
} catch (e) {
595+
console.warn('node-fetch not available, will fallback to curl later');
596+
fetchFn = null;
599597
}
600598
}
601599

602-
// Log basic module.prop parsing summary
603-
console.log(`Extracted module.prop keys: ${Object.keys(props).join(', ')}`);
604-
return props;
605-
} catch (err: any) {
606-
// Detailed error logging
607-
console.error(`Failed to extract props from ${downloadUrl}: ${err?.message || err}`);
608-
if (err?.stdout) {
609-
console.error(`runzip stdout (truncated):\n${String(err.stdout).slice(0, 2000)}`);
600+
if (fetchFn) {
601+
let lastErr: any = null;
602+
for (let attempt = 1; attempt <= 3; attempt++) {
603+
try {
604+
console.log(`HTTP: fetch attempt ${attempt} -> ${trunc(downloadUrl, 300)}`);
605+
const res = await fetchFn(downloadUrl, {
606+
method: 'GET',
607+
redirect: 'follow',
608+
headers: token ? { Authorization: `Bearer ${token}` } : {},
609+
});
610+
611+
// Log status and important headers
612+
try {
613+
const statusLine = `HTTP ${res.status} ${res.statusText || ''}`;
614+
console.log(`HTTP: status: ${statusLine}`);
615+
const hdrs: string[] = [];
616+
const hdrNames = ['content-type', 'content-length', 'content-disposition', 'x-ratelimit-remaining', 'retry-after'];
617+
for (const h of hdrNames) {
618+
const v = res.headers?.get ? res.headers.get(h) : (res.headers && res.headers[h]);
619+
if (v) hdrs.push(`${h}: ${v}`);
620+
}
621+
if (hdrs.length) console.log('HTTP headers:', hdrs.join(' | '));
622+
} catch (hdrErr) {
623+
console.warn('HTTP: failed to read some headers:', hdrErr?.message || hdrErr);
624+
}
625+
626+
if (!res.ok) {
627+
const body = await (res.text?.() ?? Promise.resolve(''));
628+
throw new Error(`HTTP ${res.status} ${res.statusText} - body-snippet: ${trunc(String(body), 500)}`);
629+
}
630+
631+
const arrayBuf = await res.arrayBuffer();
632+
buffer = Buffer.from(arrayBuf);
633+
console.log(`HTTP: downloaded ${buffer.length} bytes into memory`);
634+
break;
635+
} catch (e: any) {
636+
lastErr = e;
637+
const sleep = 200 * Math.pow(2, attempt - 1);
638+
console.warn(`HTTP fetch attempt ${attempt} failed: ${e?.message || e}. Retrying in ${sleep}ms`);
639+
await new Promise(r => setTimeout(r, sleep));
640+
}
641+
}
642+
if (!buffer) throw lastErr || new Error('fetch failed after retries');
643+
} else {
644+
console.warn('Fetch not available; will fallback to curl download later.');
645+
throw new Error('no-fetch');
610646
}
611-
if (err?.stderr) {
612-
console.error(`runzip stderr (truncated):\n${String(err.stderr).slice(0, 2000)}`);
647+
} catch (e) {
648+
console.warn('In-memory fetch path failed or unavailable:', e?.message || e);
649+
}
650+
651+
// If we have a buffer, inspect first bytes to detect HTML vs ZIP and try JS unzip
652+
if (buffer) {
653+
try {
654+
const head = buffer.slice(0, 16);
655+
const headHex = head.toString('hex');
656+
const headStr = head.toString('utf8', 0, Math.min(64, head.length));
657+
console.log(`Downloaded head (hex): ${headHex.slice(0, 200)}`);
658+
console.log(`Downloaded head (utf8 snippet): ${trunc(headStr, 200)}`);
659+
660+
// ZIP signature "PK\x03\x04" -> 50 4b 03 04
661+
if (headHex.startsWith('504b0304')) {
662+
console.log('Detected ZIP signature in downloaded data (PK..). Proceeding with JS unzipper if available.');
663+
try {
664+
const unzipper = await import('unzipper');
665+
const directory = await (unzipper as any).Open.buffer(buffer);
666+
console.log(`unzipper: entries count = ${directory.files.length}`);
667+
// Look for module.prop anywhere (root or nested)
668+
let file = directory.files.find((f: any) => f.path === 'module.prop');
669+
if (!file) file = directory.files.find((f: any) => /(^|\/|\\)module\.prop$/i.test(f.path));
670+
if (!file) {
671+
console.warn('unzipper: module.prop not found. Listing up to 200 entries for debugging:');
672+
console.warn(directory.files.map((f: any) => f.path).slice(0, 200).join('\n'));
673+
// save buffer to tmp for later analysis
674+
const savePath = path.join(tmpdir, `diag-${Date.now()}.zip`);
675+
fs.writeFileSync(savePath, buffer);
676+
console.warn(`Saved downloaded zip to ${savePath} for post-mortem`);
677+
return {};
678+
}
679+
const contentBuf: Buffer = await file.buffer();
680+
const content = contentBuf.toString('utf8');
681+
console.log(`Found module.prop at path="${file.path}", size=${contentBuf.length} bytes`);
682+
console.log('module.prop snippet (first 400 chars):\n' + trunc(content, 400));
683+
// parse properties
684+
for (const line of content.split(/\r?\n/)) {
685+
const t = line.trim();
686+
if (!t || t.startsWith('#')) continue;
687+
const idx = t.indexOf('=');
688+
if (idx > 0) props[t.substring(0, idx).trim()] = t.substring(idx + 1).trim();
689+
}
690+
console.log(`Parsed module.prop keys: ${Object.keys(props).join(', ')}`);
691+
return props;
692+
} catch (jsUnzipErr: any) {
693+
console.warn('JS unzip (unzipper) failed:', jsUnzipErr?.message || jsUnzipErr);
694+
// save buffer for analysis
695+
try {
696+
const savePath = path.join(tmpdir, `diag-buffer-failed-${Date.now()}.zip`);
697+
fs.writeFileSync(savePath, buffer);
698+
console.warn(`Saved buffer to ${savePath} for post-mortem`);
699+
} catch (saveErr: any) {
700+
console.warn('Failed to save buffer for post-mortem:', saveErr?.message || saveErr);
701+
}
702+
// fallthrough to external unzip fallback
703+
}
704+
} else {
705+
console.warn('Downloaded head does NOT look like ZIP. It may be HTML/error page. head snippet:', trunc(headStr, 200));
706+
// save buffer for analysis
707+
try {
708+
const savePath = path.join(tmpdir, `diag-nonzip-${Date.now()}.bin`);
709+
fs.writeFileSync(savePath, buffer);
710+
console.warn(`Saved downloaded response to ${savePath} for post-mortem`);
711+
} catch (saveErr: any) {
712+
console.warn('Failed to save non-zip buffer:', saveErr?.message || saveErr);
713+
}
714+
// no point continuing JS-unzip path
715+
}
716+
} catch (inspectErr: any) {
717+
console.warn('Failed to inspect downloaded buffer:', inspectErr?.message || inspectErr);
613718
}
719+
}
614720

615-
// Diagnostic: try to save the remote asset to disk for inspection using curl (best-effort)
721+
// External-tool fallback: write file via curl and use unzip -l / unzip -p to inspect and extract module.prop
722+
try {
723+
const tmpDir = fs.mkdtempSync(path.join(tmpdir, 'diag-curl-'));
724+
const tmpFile = path.join(tmpDir, `asset-${Date.now()}.zip`);
725+
const authHeader = token ? `-H "Authorization: Bearer ${token}"` : '';
726+
console.log(`Fallback: saving remote asset to ${tmpFile} using curl (authHeader present: ${!!token})`);
616727
try {
617-
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'diag-asset-'));
618-
const tmpFile = path.join(tmpDir, `asset-${Date.now()}.zip`);
619-
const tokenHeader = GRAPHQL_TOKEN ? `-H "Authorization: Bearer ${GRAPHQL_TOKEN}"` : '';
620-
console.warn(`Diagnostic: saving remote asset to ${tmpFile} using curl (may reveal HTML or error pages)`);
728+
// use curl -I to get headers first
621729
try {
622-
const curlCmd = `curl -sSL -f ${tokenHeader} "${downloadUrl}" -o "${tmpFile}"`;
623-
const { stdout: curlOut, stderr: curlErr } = await execAsync(curlCmd, { maxBuffer: 20 * 1024 * 1024 });
624-
if (curlOut) console.log(`curl stdout (truncated):\n${String(curlOut).slice(0, 1000)}`);
625-
if (curlErr) console.warn(`curl stderr (truncated):\n${String(curlErr).slice(0, 1000)}`);
730+
const { stdout: headOut } = await execP(`curl -I -L ${authHeader} "${downloadUrl}"`, { maxBuffer: 64 * 1024 });
731+
console.log('curl -I -L headers:\n' + trunc(headOut, 2000));
732+
} catch (hiErr: any) {
733+
console.warn('curl -I failed:', hiErr?.message || hiErr);
734+
}
735+
736+
// then try to download
737+
try {
738+
await execP(`curl -sSL -f ${authHeader} "${downloadUrl}" -o "${tmpFile}"`, { maxBuffer: 200 * 1024 * 1024 });
739+
console.log(`curl: downloaded file saved to ${tmpFile}`);
626740
} catch (curlErr: any) {
627-
console.warn(`curl download failed: ${curlErr?.message || curlErr}`);
628-
if (curlErr?.stdout) console.warn(`curl stdout (truncated): ${String(curlErr.stdout).slice(0,1000)}`);
629-
if (curlErr?.stderr) console.warn(`curl stderr (truncated): ${String(curlErr.stderr).slice(0,1000)}`);
741+
console.error('curl download failed:', curlErr?.message || curlErr);
742+
if (curlErr?.stdout) console.error('curl stdout snippet:', trunc(String(curlErr.stdout), 2000));
743+
if (curlErr?.stderr) console.error('curl stderr snippet:', trunc(String(curlErr.stderr), 2000));
744+
// keep going to try to list file if present
630745
}
631746

632-
// If file exists, try to list zip contents (unzip -l) and dump a head of bytes
747+
// If file exists, list entries
633748
if (fs.existsSync(tmpFile)) {
634749
try {
635-
const { stdout: listOut } = await execAsync(`unzip -l "${tmpFile}"`, { maxBuffer: 200 * 1024 });
636-
console.log(`unzip -l output (first 200 lines):\n${listOut.split('\n').slice(0, 200).join('\n')}`);
750+
const { stdout: listOut } = await execP(`unzip -l "${tmpFile}"`, { maxBuffer: 200 * 1024 });
751+
console.log('unzip -l output (first 200 lines):\n' + listOut.split('\n').slice(0, 200).join('\n'));
637752
} catch (listErr: any) {
638-
console.warn(`unzip -l failed on ${tmpFile}: ${listErr?.message || listErr}`);
639-
// Try zipinfo as alternative
753+
console.warn('unzip -l failed:', listErr?.message || listErr);
640754
try {
641-
const { stdout: zipinfoOut } = await execAsync(`zipinfo -1 "${tmpFile}"`, { maxBuffer: 200 * 1024 });
642-
console.log(`zipinfo -1 output (first 200 entries):\n${zipinfoOut.split('\n').slice(0,200).join('\n')}`);
755+
const { stdout: ziOut } = await execP(`zipinfo -1 "${tmpFile}"`, { maxBuffer: 200 * 1024 });
756+
console.log('zipinfo -1 output (first 200 entries):\n' + ziOut.split('\n').slice(0, 200).join('\n'));
643757
} catch (ziErr: any) {
644-
console.warn(`zipinfo failed: ${ziErr?.message || ziErr}`);
758+
console.warn('zipinfo failed:', ziErr?.message || ziErr);
759+
}
760+
}
761+
762+
// try to find module.prop entry via zipinfo and extract it
763+
try {
764+
const { stdout: entriesOut } = await execP(`zipinfo -1 "${tmpFile}"`, { maxBuffer: 200 * 1024 });
765+
const entries = entriesOut.split('\n').map(s => s.trim()).filter(Boolean);
766+
const candidate = entries.find(e => e === 'module.prop') || entries.find(e => /(^|\/|\\)module\.prop$/i.test(e));
767+
if (candidate) {
768+
console.log(`Found module.prop entry in zip: ${candidate}. Attempting to extract via unzip -p`);
769+
try {
770+
const { stdout: propOut } = await execP(`unzip -p "${tmpFile}" "${candidate.replace(/"/g,'\\"')}"`, { maxBuffer: 128 * 1024, encoding: 'utf8' } as any);
771+
console.log('module.prop content snippet (first 400 chars):\n' + trunc(propOut, 400));
772+
for (const line of propOut.split(/\r?\n/)) {
773+
const t = line.trim();
774+
if (!t || t.startsWith('#')) continue;
775+
const idx = t.indexOf('=');
776+
if (idx > 0) props[t.substring(0, idx).trim()] = t.substring(idx + 1).trim();
777+
}
778+
console.log(`Parsed module.prop keys (fallback): ${Object.keys(props).join(', ')}`);
779+
return props;
780+
} catch (extractErr: any) {
781+
console.warn('unzip -p extraction failed:', extractErr?.message || extractErr);
782+
}
783+
} else {
784+
console.warn('No module.prop entry found in zip entries');
645785
}
786+
} catch (entriesErr: any) {
787+
console.warn('Failed to list zip entries for candidate search:', entriesErr?.message || entriesErr);
646788
}
647789

648-
// Print head bytes (hex) for quick identification (HTML vs ZIP signature)
790+
// dump first bytes of file to help identify HTML vs ZIP
649791
try {
650792
const stats = fs.statSync(tmpFile);
651793
const fd = fs.openSync(tmpFile, 'r');
652794
const headLen = Math.min(256, stats.size);
653795
const buf = Buffer.alloc(headLen);
654796
fs.readSync(fd, buf, 0, headLen, 0);
655797
fs.closeSync(fd);
656-
console.log(`Saved file size: ${stats.size} bytes, head (hex, first ${headLen} bytes): ${buf.toString('hex').slice(0, 800)}`);
798+
console.log(`Saved file size: ${stats.size} bytes, head (hex): ${buf.toString('hex').slice(0, 512)}`);
657799
} catch (headErr: any) {
658-
console.warn(`Failed to read head bytes of saved file: ${headErr?.message || headErr}`);
800+
console.warn('Failed to read head bytes of saved file:', headErr?.message || headErr);
659801
}
660802

661-
console.warn(`Diagnostic: kept saved asset at ${tmpFile} and dir ${tmpDir} for post-mortem analysis`);
803+
console.warn(`Diagnostic: kept downloaded file for post-mortem at ${tmpFile} (directory ${tmpDir})`);
662804
} else {
663-
console.warn(`Diagnostic: curl did not produce a file at ${tmpFile}`);
805+
console.warn('Diagnostic: curl did not produce a saved file (download may have failed)');
664806
}
665-
} catch (diagErr: any) {
666-
console.warn(`Diagnostic step failed: ${diagErr?.message || diagErr}`);
807+
} catch (outerErr: any) {
808+
console.warn('Fallback diagnostic failed:', outerErr?.message || outerErr);
667809
}
668-
669-
return {};
810+
} catch (finalErr: any) {
811+
console.warn('Final diagnostics path encountered an error:', finalErr?.message || finalErr);
670812
}
813+
814+
// If we reached here, no module.prop was parsed
815+
console.warn('Diagnostic: unable to extract module.prop from URL. Returning empty props.');
816+
return {};
671817
}
672818

673819
const RESERVED_NAMES = ['.github', 'submission', 'developers', 'modules', 'org.kernelsu.example', "module_release"];

0 commit comments

Comments
 (0)