Skip to content

Commit

Permalink
feat: support reverse proxy
Browse files Browse the repository at this point in the history
  • Loading branch information
DIYgod committed Jul 23, 2023
1 parent 7856617 commit 3a5d293
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 12 deletions.
31 changes: 30 additions & 1 deletion docs/en/install/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ RSSHub supports two caching methods: memory and redis
Partial routes have a strict anti-crawler policy, and can be configured to use proxy.
Proxy can be configured via either **Proxy URI** or **Proxy options**. When both are configured, RSSHub will use the configuration in **Proxy URI**.
Proxy can be configured through **Proxy URI**, **Proxy options**, or **Reverse proxy**.
#### Proxy URI
Expand All @@ -563,6 +563,35 @@ resolved by the SOCKS server, recommanded, prevents DNS poisoning or DNS leak),
> - `http://user:[email protected]:8080`
> - `https://127.0.0.1:8443`

### Reverse proxy

`REVERSE_PROXY_URL`: Reverse proxy URL, RSSHub will use this URL as a prefix to initiate requests, for example `https://proxy.example.com?target=`, requests to `https://google.com` will be automatically converted to `https://proxy.example.com?target=https%3A%2F%2Fgoogle.com`

You can use Cloudflare Workers to build a simple reverse proxy, for example:

```js
addEventListener('fetch', event => {
event.respondWith(handleRequest(event.request))
})
async function handleRequest(request) {
const url = new URL(request.url)
let target = url.searchParams.get('target')
if (!target) {
return new Response('Hello, this is Cloudflare Proxy Service. To proxy your requests, please use the "target" URL parameter.')
} else {
target = decodeURIComponent(target)
const newRequest = new Request(target, {
headers: request.headers,
method: request.method,
body: request.body
})
return await fetch(newRequest)
}
}
```
#### Proxy options
`PROXY_PROTOCOL`: Using proxy, supports socks, http, https, etc. See [socks-proxy-agent](https://www.npmjs.com/package/socks-proxy-agent) NPM package page and [source](https://github.com/TooTallNate/proxy-agents/blob/63adbcefdb4783cc67c0eb90200886b4064e8639/packages/socks-proxy-agent/src/index.ts#L81) for what these protocols mean. See also [cURL OOTW: SOCKS5](https://daniel.haxx.se/blog/2020/05/26/curl-ootw-socks5/) for reference.
Expand Down
31 changes: 30 additions & 1 deletion docs/install/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ RSSHub 支持 `memory` 和 `redis` 两种缓存方式

部分路由反爬严格,可以配置使用代理抓取。

可通过**代理 URI****代理选项**两种方式来配置代理,当两种配置方式同时被设置时,RSSHub 将会使用**代理 URI**中的配置
可通过**代理 URI****代理选项****反向代理**三种方式来配置代理

#### 代理 URI

Expand Down Expand Up @@ -583,6 +583,35 @@ RSSHub 支持 `memory` 和 `redis` 两种缓存方式

`PROXY_URL_REGEX`: 启用代理的 URL 正则表达式,默认全部开启 `.*`

### 反向代理

`REVERSE_PROXY_URL`: 反向代理地址,RSSHub 将会使用该地址作为前缀来发起请求,例如 `https://proxy.example.com/?target=`,对 `https://google.com` 发起的请求将被自动转换为 `https://proxy.example.com/?target=https%3A%2F%2Fgoogle.com`

你可以使用 Cloudflare Workers 来搭建一个简易的反向代理,例如:

```js
addEventListener('fetch', event => {
event.respondWith(handleRequest(event.request))
})
async function handleRequest(request) {
const url = new URL(request.url)
let target = url.searchParams.get('target')
if (!target) {
return new Response('Hello, this is Cloudflare Proxy Service. To proxy your requests, please use the "target" URL parameter.')
} else {
target = decodeURIComponent(target)
const newRequest = new Request(target, {
headers: request.headers,
method: request.method,
body: request.body
})
return await fetch(newRequest)
}
}
```
### 用户认证
`protected_route.js` 内的路由将启用 HTTP Basic Authentication 认证
Expand Down
1 change: 1 addition & 0 deletions lib/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ const calculateValue = () => {
auth: envs.PROXY_AUTH,
url_regex: envs.PROXY_URL_REGEX || '.*',
},
reverseProxyUrl: envs.REVERSE_PROXY_URL,
// auth
authentication: {
name: envs.HTTP_BASIC_AUTH_NAME || 'usernam3',
Expand Down
35 changes: 26 additions & 9 deletions lib/utils/request-wrapper.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,13 @@ if (agent) {
const proxyRegex = new RegExp(proxyObj.url_regex);
const protocolMatch = (protocolLike) => protocolLike && protocolLike.toLowerCase().startsWith('http');

proxyWrapper = (url, options, urlHandler) => {
proxyWrapper = (url, options) => {
let urlHandler;
try {
urlHandler = new URL(url);
} catch (error) {
// ignore
}
if (proxyRegex.test(url)) {
if ((protocolMatch(options.protocol) || protocolMatch(url)) && (!urlHandler || urlHandler.host !== proxyUrlHandler.host)) {
options.agent = agent;
Expand All @@ -32,25 +38,34 @@ if (agent) {
}
return false;
};
} else if (config.reverseProxyUrl) {
proxyWrapper = (url, options) => {
if (!((options.url || url) + "").startsWith(config.reverseProxyUrl)) {
options.url = new URL(`${config.reverseProxyUrl}${encodeURIComponent(options.url || url)}`);
return true;
}
return false;
};
}

const requestWrapper = (url, options) => {
let urlHandler;
try {
urlHandler = new URL(url);
} catch (error) {
// ignore
}
options.headers = options.headers || {};
const headersLowerCaseKeys = Object.keys(options.headers).map((key) => key.toLowerCase());

proxyWrapper(url, options, urlHandler) ? logger.info(`Proxy for ${url}`) : logger.debug(`Requesting ${url}`);
proxyWrapper(url, options) ? logger.info(`Proxy for ${url}`) : logger.debug(`Requesting ${url}`);

// ua
if (!headersLowerCaseKeys.includes('user-agent')) {
options.headers['user-agent'] = config.ua;
}

let urlHandler;
try {
urlHandler = new URL(options.url || url);
} catch (error) {
// ignore
}

if (urlHandler) {
// referer
if (!headersLowerCaseKeys.includes('referer')) {
Expand All @@ -66,6 +81,7 @@ const requestWrapper = (url, options) => {
const httpWrap = (func) => {
const origin = func;
return function (url, request) {
const args = Array.prototype.slice.call(arguments);
if (typeof url === 'object') {
if (url instanceof URL) {
requestWrapper(url.toString(), request);
Expand All @@ -76,7 +92,8 @@ const httpWrap = (func) => {
} else {
requestWrapper(url, request);
}
return origin.apply(this, arguments);
args[0] = request.url || url;
return origin.apply(this, args);
};
};

Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
},
"testEnvironment": "node",
"testMatch": [
"**/test/**/*.js"
"**/test/**/request-wrapper.js"
],
"testPathIgnorePatterns": [
"/node_modules/",
Expand Down
23 changes: 23 additions & 0 deletions test/utils/request-wrapper.js
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,29 @@ describe('got', () => {
await parser.parseURL('http://rsshub.test/proxy');
});

it('proxy reverse proxy', async () => {
process.env.REVERSE_PROXY_URL = 'http://rsshub.test/?target=';
const url = 'http://rsshub.test/proxy';

jest.resetModules();
require('../../lib/utils/request-wrapper');
check = (request) => {
expect(request.url.toString()).toBe(`${process.env.REVERSE_PROXY_URL}${encodeURIComponent(url)}`);
};

nock(/rsshub\.test/)
.get(`/?target=${encodeURIComponent(url)}`)
.times(2)
.reply(200, simpleResponse);
nock(/rsshub\.test/)
.get('/proxy')
.times(2)
.reply(200, simpleResponse);

await got.get(url);
await parser.parseURL(url);
});

it('auth', async () => {
process.env.PROXY_AUTH = 'testtest';
process.env.PROXY_PROTOCOL = 'http'; // only http(s) proxies extract auth from Headers
Expand Down

0 comments on commit 3a5d293

Please sign in to comment.