Skip to content

Commit

Permalink
[server] Add support for proxies (#33)
Browse files Browse the repository at this point in the history
Closes #32

* [plugin] pass proxy to the server
* untested! the committed code may contain potential issues

* sort import block

* [debug] print proxies

* stringify proxies

* Server side implementation WIP

* use ydl.urlopen again

* remove dundant object.values

* remove ellipsis from the features tuple
it's copied from coletdjnz's demo

* [plugin] remove redundant assignment to rh.proxies
[sessionmanager] add a logger class
[version] use dirname/../package.json

* Add error handling for bgutils functions

* add support for proxies in process.env

* variable naming

* code formatting

* process undefined proxy

* Error handling for bgConfig fetch

* Add support for env ALL_PROXY

* code formatting

* Simplify retrieving proxy from env variables

* Add trailing comma for _SUPPORTED_FEATURES

* prioritise env HTTPS_PROXY over ALL_PROXY

* Select proxy for youtube

* switch to select_proxy with yt api hostname

* fix proxy type: bool->str

---------

Co-authored-by: Brian Le <[email protected]>
Co-authored-by: grqx_wsl <[email protected]>
  • Loading branch information
3 people authored Sep 19, 2024
1 parent dee0d89 commit 11b20a4
Show file tree
Hide file tree
Showing 8 changed files with 233 additions and 38 deletions.
23 changes: 18 additions & 5 deletions plugin/yt_dlp_plugins/extractor/getpot_bgutil_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
if typing.TYPE_CHECKING:
from yt_dlp import YoutubeDL

from yt_dlp.networking.common import Request
from yt_dlp.networking._helper import select_proxy
from yt_dlp.networking.common import Features, Request
from yt_dlp.networking.exceptions import RequestError, UnsupportedRequest

try:
Expand All @@ -21,8 +22,12 @@
@register_provider
class BgUtilHTTPPotProviderRH(GetPOTProvider):
_PROVIDER_NAME = 'BgUtilHTTPPot'
_SUPPORTED_CLIENTS = ('web', 'web_safari', 'web_embedded', 'web_music', 'web_creator', 'mweb', 'tv_embedded', 'tv')
_SUPPORTED_CLIENTS = ('web', 'web_safari', 'web_embedded',
'web_music', 'web_creator', 'mweb', 'tv_embedded', 'tv')
VERSION = __version__
_SUPPORTED_PROXY_SCHEMES = (
'http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
_SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)

def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs):
base_url = ydl.get_info_extractor('Youtube')._configuration_arg(
Expand All @@ -31,9 +36,11 @@ def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data
raise UnsupportedRequest(
'One of [data_sync_id, visitor_data] must be passed')
try:
response = ydl.urlopen(Request(f'{base_url}/ping', extensions={'timeout': 5.0}))
response = ydl.urlopen(Request(
f'{base_url}/ping', extensions={'timeout': 5.0}, proxies={'all': None}))
except Exception as e:
raise UnsupportedRequest(f'Error reaching GET /ping (caused by {e!s})') from e
raise UnsupportedRequest(
f'Error reaching GET /ping (caused by {e!s})') from e
try:
response = json.load(response)
except json.JSONDecodeError as e:
Expand All @@ -51,15 +58,21 @@ def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data

def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs) -> str:
self._logger.info('Generating POT via HTTP server')
if ((proxy := select_proxy('https://jnn-pa.googleapis.com', self.proxies))
!= select_proxy('https://youtube.com', self.proxies)):
self._logger.warning(
'Proxies for https://youtube.com and https://jnn-pa.googleapis.com are different. '
'This is likely to cause subsequent errors.')

try:
response = ydl.urlopen(Request(
f'{self.base_url}/get_pot', data=json.dumps({
'client': client,
'visitor_data': visitor_data,
'data_sync_id': data_sync_id,
'proxy': proxy,
}).encode(), headers={'Content-Type': 'application/json'},
extensions={'timeout': 12.5}))
extensions={'timeout': 12.5}, proxies={'all': None}))
except Exception as e:
raise RequestError(
f'Error reaching POST /get_pot (caused by {e!s})') from e
Expand Down
18 changes: 15 additions & 3 deletions plugin/yt_dlp_plugins/extractor/getpot_bgutil_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

if typing.TYPE_CHECKING:
from yt_dlp import YoutubeDL
from yt_dlp.networking._helper import select_proxy
from yt_dlp.networking.common import Features
from yt_dlp.networking.exceptions import RequestError, UnsupportedRequest
from yt_dlp.utils import Popen, classproperty

Expand All @@ -23,8 +25,12 @@
@register_provider
class BgUtilScriptPotProviderRH(GetPOTProvider):
_PROVIDER_NAME = 'BgUtilScriptPot'
_SUPPORTED_CLIENTS = ('web', 'web_safari', 'web_embedded', 'web_music', 'web_creator', 'mweb', 'tv_embedded', 'tv')
_SUPPORTED_CLIENTS = ('web', 'web_safari', 'web_embedded',
'web_music', 'web_creator', 'mweb', 'tv_embedded', 'tv')
VERSION = __version__
_SUPPORTED_PROXY_SCHEMES = (
'http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
_SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)

@classproperty(cache=True)
def _default_script_path(self):
Expand All @@ -51,8 +57,13 @@ def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data
def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs) -> str:
self._logger.info(
f'Generating POT via script: {self.script_path}')

command_args = ['node', self.script_path]
if proxy := select_proxy('https://jnn-pa.googleapis.com', self.proxies):
if proxy != select_proxy('https://youtube.com', self.proxies):
self._logger.warning(
'Proxies for https://youtube.com and https://jnn-pa.googleapis.com are different. '
'This is likely to cause subsequent errors.')
command_args.extend(['-p', proxy])
if data_sync_id:
command_args.extend(['-d', data_sync_id])
elif visitor_data:
Expand All @@ -75,7 +86,8 @@ def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=
msg += f'\nstderr:\n{stderr.strip()}'
self._logger.debug(msg)
if returncode:
raise RequestError(f'_get_pot_via_script failed with returncode {returncode}')
raise RequestError(
f'_get_pot_via_script failed with returncode {returncode}')

try:
# The JSON response is always the last line
Expand Down
3 changes: 3 additions & 0 deletions server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@
},
"dependencies": {
"@commander-js/extra-typings": "commander-js/extra-typings",
"axios": "^1.7.7",
"bgutils-js": "^1.1.0",
"body-parser": "^1.20.2",
"commander": "^12.1.0",
"express": "^4.19.2",
"https-proxy-agent": "^7.0.5",
"jsdom": "^25.0.0",
"socks-proxy-agent": "^8.0.4",
"youtubei.js": "^10.4.0"
},
"devDependencies": {
Expand Down
10 changes: 8 additions & 2 deletions server/src/generate_once.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@ const CACHE_PATH = path.resolve(__dirname, "..", "cache.json");
const program = new Command()
.option("-v, --visitor-data <visitordata>")
.option("-d, --data-sync-id <data-sync-id>")
.option("-p, --proxy <proxy-all>")
.option("--verbose");

program.parse();
const options = program.opts();

(async () => {
const dataSyncId = options.dataSyncId;
const visitorData = options.visitorData;
const dataSyncId = options.dataSyncId;
const proxy = options.proxy || "";
const verbose = options.verbose || false;
let visitIdentifier: string;
const cache: YoutubeSessionDataCaches = {};
Expand Down Expand Up @@ -57,7 +59,11 @@ const options = program.opts();
visitIdentifier = generatedVisitorData;
}

const sessionData = await sessionManager.generatePoToken(visitIdentifier);
const sessionData = await sessionManager.generatePoToken(
visitIdentifier,
proxy,
);

try {
fs.writeFileSync(
CACHE_PATH,
Expand Down
7 changes: 5 additions & 2 deletions server/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ const sessionManager = new SessionManager(options.verbose || false);
httpServer.post("/get_pot", async (request, response) => {
const visitorData = request.body.visitor_data as string;
const dataSyncId = request.body.data_sync_id as string;

const proxy: string = request.body.proxy;
let visitIdentifier: string;

// prioritize data sync id for authenticated requests, if passed
Expand All @@ -51,7 +51,10 @@ httpServer.post("/get_pot", async (request, response) => {
visitIdentifier = generatedVisitorData;
}

const sessionData = await sessionManager.generatePoToken(visitIdentifier);
const sessionData = await sessionManager.generatePoToken(
visitIdentifier,
proxy,
);
response.send({
po_token: sessionData.poToken,
visit_identifier: sessionData.visitIdentifier,
Expand Down
147 changes: 123 additions & 24 deletions server/src/session_manager.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import { BG } from "bgutils-js";
import { BG, BgConfig, DescrambledChallenge } from "bgutils-js";
import { JSDOM } from "jsdom";
import { Innertube } from "youtubei.js";
import { HttpsProxyAgent } from "https-proxy-agent";
import axios from "axios";
import { Agent } from "https";
import { SocksProxyAgent } from "socks-proxy-agent";

interface YoutubeSessionData {
poToken: string;
Expand All @@ -12,17 +16,40 @@ export interface YoutubeSessionDataCaches {
[visitIdentifier: string]: YoutubeSessionData;
}

export class SessionManager {
shouldLog: boolean;
class Logger {
private shouldLog: boolean;

constructor(shouldLog = true) {
this.shouldLog = shouldLog;
}

debug(msg: string) {
if (this.shouldLog) console.debug(msg);
}

log(msg: string) {
if (this.shouldLog) console.log(msg);
}

warn(msg: string) {
if (this.shouldLog) console.warn(msg);
}

error(msg: string) {
if (this.shouldLog) console.error(msg);
}
}

export class SessionManager {
private youtubeSessionDataCaches: YoutubeSessionDataCaches = {};
private TOKEN_TTL_HOURS: number;
private logger: Logger;

constructor(
shouldLog = true,
youtubeSessionDataCaches: YoutubeSessionDataCaches = {},
) {
this.shouldLog = shouldLog;
this.logger = new Logger(shouldLog);
this.setYoutubeSessionDataCaches(youtubeSessionDataCaches);
this.TOKEN_TTL_HOURS = process.env.TOKEN_TTL
? parseInt(process.env.TOKEN_TTL)
Expand Down Expand Up @@ -59,35 +86,62 @@ export class SessionManager {
this.youtubeSessionDataCaches = youtubeSessionData || {};
}

log(msg: string) {
if (this.shouldLog) console.log(msg);
}

async generateVisitorData(): Promise<string | null> {
const innertube = await Innertube.create({ retrieve_player: false });
const visitorData = innertube.session.context.client.visitorData;
if (!visitorData) {
console.error("Unable to generate visitor data via Innertube");
this.logger.error("Unable to generate visitor data via Innertube");
return null;
}

return visitorData;
}

getProxyDispatcher(proxy: string | undefined): Agent | undefined {
if (!proxy) return undefined;
let protocol: string;
try {
const parsedUrl = new URL(proxy);
protocol = parsedUrl.protocol.replace(":", "");
// eslint-disable-next-line @typescript-eslint/no-unused-vars
} catch (e) {
// assume http if no protocol was passed
protocol = "http";
proxy = `http://${proxy}`;
}

switch (protocol) {
case "http":
case "https":
this.logger.log(`Using HTTP/HTTPS proxy: ${proxy}`);
return new HttpsProxyAgent(proxy);
case "socks":
case "socks4":
case "socks4a":
case "socks5":
case "socks5h":
this.logger.log(`Using SOCKS proxy: ${proxy}`);
return new SocksProxyAgent(proxy);
default:
this.logger.warn(`Unsupported proxy protocol: ${proxy}`);
return undefined;
}
}
// mostly copied from https://github.com/LuanRT/BgUtils/tree/main/examples/node
async generatePoToken(
visitIdentifier: string,
proxy: string = "",
): Promise<YoutubeSessionData> {
this.cleanupCaches();
const sessionData = this.youtubeSessionDataCaches[visitIdentifier];
if (sessionData) {
this.log(
this.logger.log(
`POT for ${visitIdentifier} still fresh, returning cached token`,
);
return sessionData;
}

this.log(
this.logger.log(
`POT for ${visitIdentifier} stale or not yet generated, generating...`,
);

Expand All @@ -98,32 +152,77 @@ export class SessionManager {
globalThis.window = dom.window as any;
globalThis.document = dom.window.document;

const bgConfig = {
fetch: (url: any, options: any) => fetch(url, options),
let dispatcher: Agent | undefined;
if (proxy) {
dispatcher = this.getProxyDispatcher(proxy);
} else {
dispatcher = this.getProxyDispatcher(
process.env.HTTPS_PROXY ||
process.env.HTTP_PROXY ||
process.env.ALL_PROXY,
);
}

const bgConfig: BgConfig = {
fetch: async (url: any, options: any): Promise<any> => {
try {
const response = await axios.post(url, options.body, {
headers: options.headers,
httpsAgent: dispatcher,
});

return {
ok: true,
json: async () => {
return response.data;
},
};
// eslint-disable-next-line @typescript-eslint/no-unused-vars
} catch (e) {
return {
ok: false,
json: async () => {
return null;
},
};
}
},
globalObj: globalThis,
identity: visitIdentifier,
requestKey,
};

const challenge = await BG.Challenge.create(bgConfig);

let challenge: DescrambledChallenge | undefined;
try {
challenge = await BG.Challenge.create(bgConfig);
} catch (e) {
throw new Error(
`Error while attempting to retrieve BG challenge. err = ${e}`,
);
}
if (!challenge) throw new Error("Could not get Botguard challenge");

if (challenge.script) {
const script = challenge.script.find((sc) => sc !== null);
if (script) new Function(script)();
} else {
this.log("Unable to load Botguard.");
this.logger.log("Unable to load Botguard.");
}

const poToken = await BG.PoToken.generate({
program: challenge.challenge,
globalName: challenge.globalName,
bgConfig,
});
let poToken: string | undefined;
try {
poToken = await BG.PoToken.generate({
program: challenge.challenge,
globalName: challenge.globalName,
bgConfig,
});
} catch (e) {
throw new Error(
`Error while trying to generate PO token. e = ${e}`,
);
}

this.log(`po_token: ${poToken}`);
this.log(`visit_identifier: ${visitIdentifier}`);
this.logger.log(`po_token: ${poToken}`);
this.logger.log(`visit_identifier: ${visitIdentifier}`);

if (!poToken) {
throw new Error("po_token unexpected undefined");
Expand Down
Loading

0 comments on commit 11b20a4

Please sign in to comment.