Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions library/agent/Agent.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ wrap(fetch, "fetch", function mock() {
pattern: "Bytespider",
},
],
blockedSignatureAgents: "chatgpt.com",
monitoredSignatureAgents: "",
} satisfies Response),
};
};
Expand Down
6 changes: 6 additions & 0 deletions library/agent/Agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -398,13 +398,19 @@ export class Agent {
monitoredIPAddresses,
monitoredUserAgents,
userAgentDetails,
blockedSignatureAgents,
monitoredSignatureAgents,
} = await fetchBlockedLists(this.token);
this.serviceConfig.updateBlockedIPAddresses(blockedIPAddresses);
this.serviceConfig.updateBlockedUserAgents(blockedUserAgents);
this.serviceConfig.updateAllowedIPAddresses(allowedIPAddresses);
this.serviceConfig.updateMonitoredIPAddresses(monitoredIPAddresses);
this.serviceConfig.updateMonitoredUserAgents(monitoredUserAgents);
this.serviceConfig.updateUserAgentDetails(userAgentDetails);
this.serviceConfig.updateBlockedSignatureAgents(blockedSignatureAgents);
this.serviceConfig.updateMonitoredSignatureAgents(
monitoredSignatureAgents
);
} catch (error: any) {
console.error(`Aikido: Failed to update blocked lists: ${error.message}`);
}
Expand Down
43 changes: 43 additions & 0 deletions library/agent/ServiceConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ export class ServiceConfig {
private monitoredIPAddresses: { list: IPMatcher; key: string }[] = [];
private monitoredUserAgentRegex: RegExp | undefined;
private userAgentDetails: { pattern: RegExp; key: string }[] = [];
private blockedSignatureAgentRegex: RegExp | undefined;
private monitoredSignatureAgentRegex: RegExp | undefined;

constructor(
endpoints: EndpointConfig[],
Expand Down Expand Up @@ -278,4 +280,45 @@ export class ServiceConfig {
hasReceivedAnyStats() {
return this.receivedAnyStats;
}

updateBlockedSignatureAgents(blockedSignatureAgents: string) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line exceeds 120 characters with 'updateBlockedSignatureAgents(blockedSignatureAgents: string)' method signature More info

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

False positive

if (!blockedSignatureAgents) {
// If an empty string is passed, we want to set the regex to undefined
// e.g. new RegExp("").test("abc") == true
this.blockedSignatureAgentRegex = undefined;
return;
}
this.blockedSignatureAgentRegex = safeCreateRegExp(
blockedSignatureAgents,
"i"
);
}

updateMonitoredSignatureAgents(monitoredSignatureAgents: string) {
if (!monitoredSignatureAgents) {
// If an empty string is passed, we want to set the regex to undefined
// e.g. new RegExp("").test("abc") == true
this.monitoredSignatureAgentRegex = undefined;
return;
}

this.monitoredSignatureAgentRegex = safeCreateRegExp(
monitoredSignatureAgents,
"i"
);
}

isSignatureAgentBlocked(ua: string): boolean {
if (this.blockedSignatureAgentRegex) {
return this.blockedSignatureAgentRegex.test(ua);
}
return false;
}

isSignatureAgentMonitored(ua: string): boolean {
if (this.monitoredSignatureAgentRegex) {
return this.monitoredSignatureAgentRegex.test(ua);
}
return false;
}
}
12 changes: 12 additions & 0 deletions library/agent/api/fetchBlockedLists.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ export type Response = {
// If we want to collect stats about the individual user agents,
// we can loop through the userAgentDetails and match each pattern.
userAgentDetails: UserAgentDetails[];
blockedSignatureAgents: string;
monitoredSignatureAgents: string;
};

export async function fetchBlockedLists(token: Token): Promise<Response> {
Expand Down Expand Up @@ -56,6 +58,8 @@ export async function fetchBlockedLists(token: Token): Promise<Response> {
blockedUserAgents: string;
monitoredUserAgents: string;
userAgentDetails: UserAgentDetails[];
blockedSignatureAgents: string;
monitoredSignatureAgents: string;
} = JSON.parse(body);

return {
Expand Down Expand Up @@ -85,5 +89,13 @@ export async function fetchBlockedLists(token: Token): Promise<Response> {
result && Array.isArray(result.userAgentDetails)
? result.userAgentDetails
: [],
blockedSignatureAgents:
result && typeof result.blockedSignatureAgents === "string"
? result.blockedSignatureAgents
: "",
monitoredSignatureAgents:
result && typeof result.monitoredSignatureAgents === "string"
? result.monitoredSignatureAgents
: "",
};
}
19 changes: 18 additions & 1 deletion library/sources/HTTPServer.stats.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,13 @@ wrap(fetchBlockedLists, "fetchBlockedLists", function fetchBlockedLists() {
key: "google_extended",
pattern: "Google-Extended",
},
{
key: "chatgpt_agent",
pattern: "chatgpt.com",
},
],
blockedSignatureAgents: "",
monitoredSignatureAgents: "chatgpt.com",
} satisfies Response;
};
});
Expand Down Expand Up @@ -95,17 +101,28 @@ t.test("it tracks monitored user agents", async () => {
},
timeoutInMS: 500,
}),
]).then(([response1, response2, response3]) => {
fetch({
url: new URL("http://localhost:3327/test"),
method: "GET",
headers: {
"Signature-Agent": "chatgpt.com",
},
timeoutInMS: 500,
}),
]).then(([response1, response2, response3, response4]) => {
t.equal(response1.statusCode, 200);
t.equal(response2.statusCode, 200);
t.equal(response3.statusCode, 200);
t.equal(response4.statusCode, 200);
const stats = agent.getInspectionStatistics().getStats();
t.same(stats.userAgents, {
breakdown: {
// eslint-disable-next-line camelcase
ai_data_scrapers: 1,
// eslint-disable-next-line camelcase
google_extended: 1,
// eslint-disable-next-line camelcase
chatgpt_agent: 1,
},
});
t.same(stats.ipAddresses, {
Expand Down
2 changes: 2 additions & 0 deletions library/sources/HTTPServer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ wrap(fetchBlockedLists, "fetchBlockedLists", function fetchBlockedLists() {
monitoredUserAgents: "",
monitoredIPAddresses: [],
userAgentDetails: [],
blockedSignatureAgents: "",
monitoredSignatureAgents: "",
} satisfies Response;
};
});
Expand Down
2 changes: 2 additions & 0 deletions library/sources/Hono.allowedIPAddresses.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ wrap(fetch, "fetch", function mock(original) {
],
monitoredIPAddresses: [],
monitoredUserAgents: "",
blockedSignatureAgents: "",
monitoredSignatureAgents: "",
} satisfies Response),
};
}
Expand Down
32 changes: 32 additions & 0 deletions library/sources/Hono.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ wrap(fetch, "fetch", function mock(original) {
pattern: "attacker",
},
],
blockedSignatureAgents: "chatgpt.com",
monitoredSignatureAgents: "",
} satisfies Response),
};
}
Expand Down Expand Up @@ -592,6 +594,36 @@ t.test("bypass list works", opts, async (t) => {
});
t.equal(response4.statusCode, 200);

// It does not block bypassed ip because of signature agent
const response5 = await fetch.fetch({
url: new URL("http://127.0.0.1:8769/"),
headers: {
"X-Forwarded-For": "123.1.2.254",
"Signature-Agent": "chatgpt.com",
},
});
t.equal(response5.statusCode, 200);

// It blocks non-bypassed ip because of signature agent
const response6 = await fetch.fetch({
url: new URL("http://127.0.0.1:8769/"),
headers: {
"X-Forwarded-For": "2.3.4.5",
"Signature-Agent": "chatgpt.com",
},
});
t.equal(response6.statusCode, 403);

// Not blocked signature agent
const response7 = await fetch.fetch({
url: new URL("http://127.0.0.1:8769/"),
headers: {
"X-Forwarded-For": "2.3.4.5",
"Signature-Agent": "some-other-agent",
},
});
t.equal(response7.statusCode, 200);

// Cleanup server
server.close();
});
Expand Down
54 changes: 43 additions & 11 deletions library/sources/http-server/checkIfRequestIsBlocked.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,9 @@ export function checkIfRequestIsBlocked(
? context.headers["user-agent"]
: undefined;

const isUserAgentBlocked = userAgent
? agent.getConfig().isUserAgentBlocked(userAgent)
: ({ blocked: false } as const);

if (userAgent) {
const isUserAgentBlocked = agent.getConfig().isUserAgentBlocked(userAgent);

const isMonitoredUserAgent = agent
.getConfig()
.isMonitoredUserAgent(userAgent);
Expand All @@ -134,17 +132,51 @@ export function checkIfRequestIsBlocked(
.getMatchingUserAgentKeys(userAgent);
agent.getInspectionStatistics().onUserAgentMatches(userAgentKeys);
}

if (isUserAgentBlocked.blocked) {
res.statusCode = 403;
res.setHeader("Content-Type", "text/plain");

res.end(
"You are not allowed to access this resource because you have been identified as a bot."
);

return true;
}
}

if (isUserAgentBlocked.blocked) {
res.statusCode = 403;
res.setHeader("Content-Type", "text/plain");
const signatureAgent =
context.headers && typeof context.headers["signature-agent"] === "string"
? context.headers["signature-agent"]
: undefined;

res.end(
"You are not allowed to access this resource because you have been identified as a bot."
);
if (signatureAgent) {
const isSignatureAgentBlocked = agent
.getConfig()
.isSignatureAgentBlocked(signatureAgent);

return true;
const isMonitoredSignatureAgent = agent
.getConfig()
.isSignatureAgentMonitored(signatureAgent);

if (isSignatureAgentBlocked || isMonitoredSignatureAgent) {
// Find all the matching user agent keys when it's a blocked or monitored user agent
Copy link

@aikido-pr-checks aikido-pr-checks bot Aug 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line exceeds 120 characters limit making it harder to read and maintain More info

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

False detection

const userAgentKeys = agent
.getConfig()
.getMatchingUserAgentKeys(signatureAgent);
agent.getInspectionStatistics().onUserAgentMatches(userAgentKeys);
}

if (isSignatureAgentBlocked) {
res.statusCode = 403;
res.setHeader("Content-Type", "text/plain");

res.end(
"You are not allowed to access this resource because you have been identified as a bot."
);

return true;
}
}

return false;
Expand Down
Loading