From 835bfd4fbf4fab386d1051dc08ac410f31176458 Mon Sep 17 00:00:00 2001 From: Egor Ternovoy Date: Sun, 4 Aug 2024 23:35:56 +0400 Subject: [PATCH] services.json actualizer (#8) --- .github/workflows/update_data.yml | 52 ++ .gitignore | 1 + Cargo.lock | 727 +++++++++++++++++- README.md | 1 + config.yml | 11 + fastside-actualizer/Cargo.toml | 14 + fastside-actualizer/src/main.rs | 215 +++++- fastside-actualizer/src/serde_types.rs | 172 +++++ fastside-actualizer/src/services/default.rs | 51 ++ fastside-actualizer/src/services/mod.rs | 22 + fastside-actualizer/src/services/searx.rs | 66 ++ fastside-actualizer/src/types.rs | 27 + fastside-actualizer/src/utils/log_err.rs | 15 + fastside-actualizer/src/utils/mod.rs | 3 + fastside-actualizer/src/utils/normalize.rs | 11 + fastside-actualizer/src/utils/tags.rs | 188 +++++ fastside-shared/Cargo.toml | 9 + fastside-shared/src/client_builder.rs | 68 ++ {fastside => fastside-shared}/src/config.rs | 65 +- fastside-shared/src/errors.rs | 15 + fastside-shared/src/lib.rs | 7 + .../src/log_setup.rs | 0 fastside-shared/src/serde_types.rs | 72 +- fastside/Cargo.toml | 3 - fastside/src/crawler.rs | 67 +- fastside/src/errors.rs | 2 +- fastside/src/main.rs | 22 +- fastside/src/routes/api.rs | 2 +- fastside/src/routes/config.rs | 4 +- fastside/src/routes/redirect.rs | 9 +- fastside/src/search.rs | 5 +- fastside/src/types.rs | 7 +- fastside/src/utils/user_config.rs | 3 +- services.json | 15 +- 34 files changed, 1758 insertions(+), 193 deletions(-) create mode 100644 .github/workflows/update_data.yml create mode 100644 config.yml create mode 100644 fastside-actualizer/src/serde_types.rs create mode 100644 fastside-actualizer/src/services/default.rs create mode 100644 fastside-actualizer/src/services/mod.rs create mode 100644 fastside-actualizer/src/services/searx.rs create mode 100644 fastside-actualizer/src/types.rs create mode 100644 fastside-actualizer/src/utils/log_err.rs create mode 100644 fastside-actualizer/src/utils/mod.rs create mode 100644 fastside-actualizer/src/utils/normalize.rs create mode 100644 fastside-actualizer/src/utils/tags.rs create mode 100644 fastside-shared/src/client_builder.rs rename {fastside => fastside-shared}/src/config.rs (54%) create mode 100644 fastside-shared/src/errors.rs rename {fastside => fastside-shared}/src/log_setup.rs (100%) diff --git a/.github/workflows/update_data.yml b/.github/workflows/update_data.yml new file mode 100644 index 0000000..25c04a7 --- /dev/null +++ b/.github/workflows/update_data.yml @@ -0,0 +1,52 @@ +name: Update Data + +on: + schedule: + # Run every hour + - cron: '0 * * * *' + workflow_dispatch: + +jobs: + update-data: + runs-on: ubuntu-latest + steps: + - name: Install tor and i2p + run: | + sudo apt-get update + sudo apt-get install tor i2pd -y + sudo systemctl start tor + sudo systemctl start i2pd + echo "Wait for tor and i2pd to start" + sleep 60 + + - name: Checkout the data branch + uses: actions/checkout@v4 + with: + ref: data + fetch-depth: 1 + + - name: Get services.json from master branch + run: git fetch origin master && git checkout origin/master -- services.json + + - name: Run Docker command + run: docker run --rm -it -e "FS__LOG=debug,reqwest=WARN,hyper_util=WARN,h2=WARN,rustls=WARN,hickory_proto=WARN,hickory_resolver=WARN" ghcr.io/cofob/fastside fastside-actualizer actualize services.json + + - name: Commit and push changes to data branch + run: | + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git add data.json services.json + git commit -m "Update data.json and services.json" + git push origin data + + - name: Check current hour and commit to master if midnight + run: | + CURRENT_HOUR=$(date +'%H') + if [ "$CURRENT_HOUR" -eq 0 ]; then + git checkout master + git fetch origin data + git checkout origin/data -- services.json + git add services.json + git commit -m "Update services.json from data branch" + git push origin master + fi diff --git a/.gitignore b/.gitignore index ff51ef8..6e6099b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ /*.local* /debug/ /target/ +/data.json diff --git a/Cargo.lock b/Cargo.lock index c2105dd..c91c5d5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,7 +8,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f7b0a21988c1bf877cf4759ef5ddaac04c1c9fe808c9142ecb78ba97d97a28a" dependencies = [ - "bitflags", + "bitflags 2.6.0", "bytes", "futures-core", "futures-sink", @@ -31,7 +31,7 @@ dependencies = [ "actix-utils", "ahash", "base64 0.22.1", - "bitflags", + "bitflags 2.6.0", "bytes", "bytestring", "derive_more", @@ -100,7 +100,7 @@ dependencies = [ "futures-core", "futures-util", "mio 0.8.11", - "socket2", + "socket2 0.5.7", "tokio", "tracing", ] @@ -161,7 +161,7 @@ dependencies = [ "serde_json", "serde_urlencoded", "smallvec", - "socket2", + "socket2 0.5.7", "time", "url", ] @@ -329,6 +329,199 @@ dependencies = [ "nom", ] +[[package]] +name = "async-channel" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" +dependencies = [ + "concurrent-queue", + "event-listener 2.5.3", + "futures-core", +] + +[[package]] +name = "async-channel" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89b47800b0be77592da0afd425cc03468052844aff33b84e33cc696f64e77b6a" +dependencies = [ + "concurrent-queue", + "event-listener-strategy", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-executor" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7ebdfa2ebdab6b1760375fa7d6f382b9f486eac35fc994625a00e89280bdbb7" +dependencies = [ + "async-task", + "concurrent-queue", + "fastrand 2.1.0", + "futures-lite 2.3.0", + "slab", +] + +[[package]] +name = "async-global-executor" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05b1b633a2115cd122d73b955eadd9916c18c8f510ec9cd1686404c60ad1c29c" +dependencies = [ + "async-channel 2.3.1", + "async-executor", + "async-io 2.3.3", + "async-lock 3.4.0", + "blocking", + "futures-lite 2.3.0", + "once_cell", +] + +[[package]] +name = "async-io" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fc5b45d93ef0529756f812ca52e44c221b35341892d3dcc34132ac02f3dd2af" +dependencies = [ + "async-lock 2.8.0", + "autocfg", + "cfg-if", + "concurrent-queue", + "futures-lite 1.13.0", + "log", + "parking", + "polling 2.8.0", + "rustix 0.37.27", + "slab", + "socket2 0.4.10", + "waker-fn", +] + +[[package]] +name = "async-io" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d6baa8f0178795da0e71bc42c9e5d13261aac7ee549853162e66a241ba17964" +dependencies = [ + "async-lock 3.4.0", + "cfg-if", + "concurrent-queue", + "futures-io", + "futures-lite 2.3.0", + "parking", + "polling 3.7.2", + "rustix 0.38.34", + "slab", + "tracing", + "windows-sys 0.52.0", +] + +[[package]] +name = "async-lock" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "287272293e9d8c41773cec55e365490fe034813a2f172f502d6ddcf75b2f582b" +dependencies = [ + "event-listener 2.5.3", +] + +[[package]] +name = "async-lock" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff6e472cdea888a4bd64f342f09b3f50e1886d32afe8df3d663c01140b811b18" +dependencies = [ + "event-listener 5.3.1", + "event-listener-strategy", + "pin-project-lite", +] + +[[package]] +name = "async-process" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea6438ba0a08d81529c69b36700fa2f95837bfe3e776ab39cde9c14d9149da88" +dependencies = [ + "async-io 1.13.0", + "async-lock 2.8.0", + "async-signal", + "blocking", + "cfg-if", + "event-listener 3.1.0", + "futures-lite 1.13.0", + "rustix 0.38.34", + "windows-sys 0.48.0", +] + +[[package]] +name = "async-signal" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfb3634b73397aa844481f814fad23bbf07fdb0eabec10f2eb95e58944b1ec32" +dependencies = [ + "async-io 2.3.3", + "async-lock 3.4.0", + "atomic-waker", + "cfg-if", + "futures-core", + "futures-io", + "rustix 0.38.34", + "signal-hook-registry", + "slab", + "windows-sys 0.52.0", +] + +[[package]] +name = "async-std" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62565bb4402e926b29953c785397c6dc0391b7b446e45008b0049eb43cec6f5d" +dependencies = [ + "async-channel 1.9.0", + "async-global-executor", + "async-io 1.13.0", + "async-lock 2.8.0", + "async-process", + "crossbeam-utils", + "futures-channel", + "futures-core", + "futures-io", + "futures-lite 1.13.0", + "gloo-timers", + "kv-log-macro", + "log", + "memchr", + "once_cell", + "pin-project-lite", + "pin-utils", + "slab", + "wasm-bindgen-futures", +] + +[[package]] +name = "async-std-resolver" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc3b454643291f9a4a3bbdb35fa62efa4ba7be5ea13fe243e3be4352182ff4b8" +dependencies = [ + "async-std", + "async-trait", + "futures-io", + "futures-util", + "hickory-resolver", + "pin-utils", + "socket2 0.5.7", +] + +[[package]] +name = "async-task" +version = "4.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" + [[package]] name = "async-trait" version = "0.1.81" @@ -388,6 +581,12 @@ dependencies = [ "serde", ] +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.6.0" @@ -406,6 +605,19 @@ dependencies = [ "generic-array", ] +[[package]] +name = "blocking" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703f41c54fc768e63e091340b424302bb1c29ef4aa0c7f10fe849dfb114d29ea" +dependencies = [ + "async-channel 2.3.1", + "async-task", + "futures-io", + "futures-lite 2.3.0", + "piper", +] + [[package]] name = "bumpalo" version = "3.16.0" @@ -487,7 +699,7 @@ version = "4.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", "syn", @@ -505,6 +717,15 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "config" version = "0.14.0" @@ -586,6 +807,12 @@ dependencies = [ "libc", ] +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + [[package]] name = "crunchy" version = "0.2.2" @@ -602,6 +829,12 @@ dependencies = [ "typenum", ] +[[package]] +name = "data-encoding" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" + [[package]] name = "deranged" version = "0.3.11" @@ -658,6 +891,18 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "enum-as-inner" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "env_logger" version = "0.10.2" @@ -677,6 +922,69 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + +[[package]] +name = "event-listener" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d93877bcde0eb80ca09131a08d23f0a5c18a620b01db137dba666d18cd9b30c2" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener" +version = "5.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6032be9bd27023a771701cc49f9f053c751055f71efb2e0ae5c15809093675ba" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f214dc438f977e6d4e3500aaa277f5ad94ca83fbbd9b1a15713ce2344ccc5a1" +dependencies = [ + "event-listener 5.3.1", + "pin-project-lite", +] + +[[package]] +name = "fastrand" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +dependencies = [ + "instant", +] + +[[package]] +name = "fastrand" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" + [[package]] name = "fastside" version = "0.2.0" @@ -687,18 +995,15 @@ dependencies = [ "base64 0.22.1", "chrono", "clap", - "config", "fastside-shared", "futures", "log", "num_cpus", - "pretty_env_logger", "rand", "regex", "reqwest", "serde", "serde_json", - "serde_qs", "thiserror", "time", "tokio", @@ -710,15 +1015,33 @@ dependencies = [ name = "fastside-actualizer" version = "0.1.0" dependencies = [ + "anyhow", + "async-std-resolver", + "async-trait", + "chrono", + "clap", "fastside-shared", + "ipnet", + "log", + "regex", "reqwest", + "serde", + "serde_json", + "serde_yaml", + "tokio", + "url", ] [[package]] name = "fastside-shared" version = "0.1.0" dependencies = [ + "anyhow", "base64 0.22.1", + "config", + "log", + "pretty_env_logger", + "reqwest", "serde", "serde_json", "thiserror", @@ -788,6 +1111,34 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +[[package]] +name = "futures-lite" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" +dependencies = [ + "fastrand 1.9.0", + "futures-core", + "futures-io", + "memchr", + "parking", + "pin-project-lite", + "waker-fn", +] + +[[package]] +name = "futures-lite" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52527eb5074e35e9339c6b4e8d12600c7128b68fb25dcb9fa9dec18f7c25f3a5" +dependencies = [ + "fastrand 2.1.0", + "futures-core", + "futures-io", + "parking", + "pin-project-lite", +] + [[package]] name = "futures-macro" version = "0.3.30" @@ -856,6 +1207,18 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" +[[package]] +name = "gloo-timers" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b995a66bb87bebce9a0f4a95aed01daca4872c050bfcb21653361c03bc35e5c" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "h2" version = "0.4.5" @@ -887,6 +1250,12 @@ version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "heck" version = "0.5.0" @@ -899,6 +1268,66 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +[[package]] +name = "hermit-abi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" + +[[package]] +name = "hickory-proto" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07698b8420e2f0d6447a436ba999ec85d8fbf2a398bbd737b82cac4a2e96e512" +dependencies = [ + "async-trait", + "cfg-if", + "data-encoding", + "enum-as-inner", + "futures-channel", + "futures-io", + "futures-util", + "idna 0.4.0", + "ipnet", + "once_cell", + "rand", + "thiserror", + "tinyvec", + "tracing", + "url", +] + +[[package]] +name = "hickory-resolver" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28757f23aa75c98f254cf0405e6d8c25b831b32921b050a66692427679b1f243" +dependencies = [ + "cfg-if", + "futures-util", + "hickory-proto", + "ipconfig", + "lru-cache", + "once_cell", + "parking_lot", + "rand", + "resolv-conf", + "smallvec", + "thiserror", + "tracing", +] + +[[package]] +name = "hostname" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867" +dependencies = [ + "libc", + "match_cfg", + "winapi", +] + [[package]] name = "http" version = "0.2.12" @@ -1022,7 +1451,7 @@ dependencies = [ "http-body", "hyper", "pin-project-lite", - "socket2", + "socket2 0.5.7", "tokio", "tower", "tower-service", @@ -1052,6 +1481,16 @@ dependencies = [ "cc", ] +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "idna" version = "0.5.0" @@ -1072,6 +1511,38 @@ dependencies = [ "hashbrown 0.14.5", ] +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "io-lifetimes" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" +dependencies = [ + "hermit-abi 0.3.9", + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "ipconfig" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b58db92f96b720de98181bbbe63c831e87005ab460c1bf306eb2622b4707997f" +dependencies = [ + "socket2 0.5.7", + "widestring", + "windows-sys 0.48.0", + "winreg 0.50.0", +] + [[package]] name = "ipnet" version = "2.9.0" @@ -1084,7 +1555,7 @@ version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.9", "libc", "windows-sys 0.52.0", ] @@ -1121,6 +1592,15 @@ dependencies = [ "serde", ] +[[package]] +name = "kv-log-macro" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f" +dependencies = [ + "log", +] + [[package]] name = "language-tags" version = "0.3.2" @@ -1151,6 +1631,18 @@ version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" +[[package]] +name = "linux-raw-sys" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + [[package]] name = "local-channel" version = "0.1.5" @@ -1183,6 +1675,24 @@ name = "log" version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +dependencies = [ + "value-bag", +] + +[[package]] +name = "lru-cache" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e24f1ad8321ca0e8a1e0ac13f23cb668e6f5466c2c57319f6a5cf1cc8e3b1c" +dependencies = [ + "linked-hash-map", +] + +[[package]] +name = "match_cfg" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" [[package]] name = "memchr" @@ -1239,7 +1749,7 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4569e456d394deccd22ce1c1913e6ea0e54519f577285001215d33557431afe4" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.9", "libc", "wasi", "windows-sys 0.52.0", @@ -1276,7 +1786,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.9", "libc", ] @@ -1305,6 +1815,12 @@ dependencies = [ "hashbrown 0.13.2", ] +[[package]] +name = "parking" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" + [[package]] name = "parking_lot" version = "0.12.3" @@ -1423,6 +1939,48 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "piper" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae1d5c74c9876f070d3e8fd503d748c7d974c3e48da8f41350fa5222ef9b4391" +dependencies = [ + "atomic-waker", + "fastrand 2.1.0", + "futures-io", +] + +[[package]] +name = "polling" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b2d323e8ca7996b3e23126511a523f7e62924d93ecd5ae73b333815b0eb3dce" +dependencies = [ + "autocfg", + "bitflags 1.3.2", + "cfg-if", + "concurrent-queue", + "libc", + "log", + "pin-project-lite", + "windows-sys 0.48.0", +] + +[[package]] +name = "polling" +version = "3.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3ed00ed3fbf728b5816498ecd316d1716eecaced9c0c8d2c5a6740ca214985b" +dependencies = [ + "cfg-if", + "concurrent-queue", + "hermit-abi 0.4.0", + "pin-project-lite", + "rustix 0.38.34", + "tracing", + "windows-sys 0.52.0", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -1457,6 +2015,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quinn" version = "0.11.2" @@ -1499,7 +2063,7 @@ checksum = "8bffec3605b73c6f1754535084a85229fa8a30f86014e6c81aeec4abb68b0285" dependencies = [ "libc", "once_cell", - "socket2", + "socket2 0.5.7", "windows-sys 0.52.0", ] @@ -1548,7 +2112,7 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" dependencies = [ - "bitflags", + "bitflags 2.6.0", ] [[package]] @@ -1627,7 +2191,17 @@ dependencies = [ "wasm-bindgen-futures", "web-sys", "webpki-roots", - "winreg", + "winreg 0.52.0", +] + +[[package]] +name = "resolv-conf" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52e44394d2086d010551b14b53b1f24e31647570cd1deb0379e2c21b329aba00" +dependencies = [ + "hostname", + "quick-error", ] [[package]] @@ -1652,7 +2226,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94" dependencies = [ "base64 0.21.7", - "bitflags", + "bitflags 2.6.0", "serde", "serde_derive", ] @@ -1688,6 +2262,33 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.37.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea8ca367a3a01fe35e6943c400addf443c0f57670e6ec51196f71a4b8762dd2" +dependencies = [ + "bitflags 1.3.2", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys 0.3.8", + "windows-sys 0.48.0", +] + +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags 2.6.0", + "errno", + "libc", + "linux-raw-sys 0.4.14", + "windows-sys 0.52.0", +] + [[package]] name = "rustls" version = "0.23.12" @@ -1779,17 +2380,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_qs" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd34f36fe4c5ba9654417139a9b3a20d2e1de6012ee678ad14d240c22c78d8d6" -dependencies = [ - "percent-encoding", - "serde", - "thiserror", -] - [[package]] name = "serde_spanned" version = "0.6.7" @@ -1811,6 +2401,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "sha1" version = "0.10.6" @@ -1857,6 +2460,16 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +[[package]] +name = "socket2" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "socket2" version = "0.5.7" @@ -1999,7 +2612,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.5.7", "tokio-macros", "windows-sys 0.52.0", ] @@ -2198,6 +2811,12 @@ version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "untrusted" version = "0.9.0" @@ -2211,7 +2830,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" dependencies = [ "form_urlencoded", - "idna", + "idna 0.5.0", "percent-encoding", "serde", ] @@ -2228,12 +2847,24 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "value-bag" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a84c137d37ab0142f0f2ddfe332651fdbf252e7b7dbb4e67b6c1f1b2e925101" + [[package]] name = "version_check" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "waker-fn" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317211a0dc0ceedd78fb2ca9a44aed3d7b9b26f81870d485c07122b4350673b7" + [[package]] name = "want" version = "0.3.1" @@ -2334,6 +2965,28 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "widestring" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7219d36b6eac893fa81e84ebe06485e7dcbb616177469b142df14f1f4deb1311" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.8" @@ -2343,6 +2996,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.52.0" @@ -2500,6 +3159,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "winreg" version = "0.52.0" diff --git a/README.md b/README.md index 3620122..37cd26b 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ bottlenecks and rate-limiting. - [x] Anonymous and cached redirects via `/@cached/#{path}` routes. - [x] History redirects via `/_/{path}` routes. - [x] Fallback redirects. +- [x] Automatic update of services list. - [x] API. ## Demo diff --git a/config.yml b/config.yml new file mode 100644 index 0000000..368076f --- /dev/null +++ b/config.yml @@ -0,0 +1,11 @@ +--- +proxies: + tor: + url: socks5h://127.0.0.1:9050 + i2p: + url: http://127.0.0.1:4444 +default_user_config: + required_tags: + - clearnet + - https + - ipv4 diff --git a/fastside-actualizer/Cargo.toml b/fastside-actualizer/Cargo.toml index 62d9d56..7ba5b5b 100644 --- a/fastside-actualizer/Cargo.toml +++ b/fastside-actualizer/Cargo.toml @@ -10,3 +10,17 @@ reqwest = { version = "0.12.4", default-features = false, features = [ "rustls-tls", "http2", ] } # http client + +tokio = { version = "1.37.0", features = ["full"] } # async +async-std-resolver = "0.24.1" # async dns resolver +clap = { version = "4.5.4", features = ["derive"] } # cli +anyhow = "1.0.83" # error +serde = { version = "1.0.201", features = ["derive"] } # serialization +serde_yaml = "0.9.34" # serialization +url = { version = "2.5.0", features = ["serde"] } # url +serde_json = "1.0.117" # serialization +log = "0.4.21" # logging +async-trait = "0.1.81" # async trait +chrono = "0.4.38" # datetime +regex = "1.10.5" # regex +ipnet = "2.9.0" # ip utils diff --git a/fastside-actualizer/src/main.rs b/fastside-actualizer/src/main.rs index 707c72c..3a950ba 100644 --- a/fastside-actualizer/src/main.rs +++ b/fastside-actualizer/src/main.rs @@ -1,5 +1,214 @@ -/// Fastside services.json actualizer. +//! Fastside services.json actualizer. -fn main() { - println!("Hello, world!"); +use std::path::PathBuf; + +use anyhow::{anyhow, Context, Result}; +use clap::{Parser, Subcommand}; +use fastside_shared::{ + client_builder::build_client, + config::{load_config, CrawlerConfig, ProxyData}, + errors::CliError, + log_setup::configure_logging, + serde_types::{Service, StoredData}, +}; +use serde_types::ActualizerData; +use utils::{log_err::LogErrResult, normalize::normalize_instances, tags::update_instance_tags}; + +mod serde_types; +mod services; +mod types; +mod utils; + +#[macro_use] +extern crate log; + +#[derive(Parser)] +#[command(author, version, about, long_about = None)] +struct Cli { + #[command(subcommand)] + command: Option, + /// Path to the configuration file. + #[arg(short, long, default_value = None)] + config: Option, + /// Log level. Takes precedence over the FS__LOG env variable. Default is INFO. + #[arg(long, default_value = None)] + log_level: Option, +} +#[derive(Subcommand)] +enum Commands { + /// Actualize services.json. + Actualize { + /// Services file path. + #[arg(default_value = "services.json")] + services: PathBuf, + /// Output file path. Default is writing to services.json. + #[arg(short, long, default_value = None)] + output: Option, + /// Data file path. + #[arg(short, long, default_value = "data.json")] + data: PathBuf, + }, +} + +/// Update service instances by fetching new instances from the service update. +async fn update_service(service: &mut Service, client: reqwest::Client) { + let name = service.name.clone(); + info!("Updating service: {}", name); + match services::get_service_updater(&name) { + Some(updater) => { + let updated_instances_result = updater + .update(client, &service.instances) + .await + .context("failed to update service"); + match updated_instances_result { + Ok(updated_instances) => { + debug!("Updated instances: {:?}", updated_instances); + service.instances = normalize_instances(&updated_instances) + } + Err(e) => { + error!("Failed to update service {name}: {e}"); + service.instances = normalize_instances(&service.instances); + } + } + } + None => { + debug!("No updater found for service {}", name); + service.instances = normalize_instances(&service.instances); + } + }; +} + +/// Check instances for a service. +/// +/// This function will check all instances of a service and update their ping history. +async fn check_instances( + actualizer_data: &mut ActualizerData, + proxies: &ProxyData, + name: &str, + service: &mut Service, + config: &CrawlerConfig, +) -> Result<()> { + let checker = services::get_instance_checker(name); + let service_history = actualizer_data + .services + .entry(name.to_string()) + .or_default(); + let service_clone = service.clone(); + for instance in service.instances.iter_mut() { + info!("Checking instance: {}", instance.url); + let client = build_client(&service_clone, config, proxies, instance)?; + let is_alive = { + let res = checker + .check(client.clone(), &service_clone, instance) + .await; + match res { + Ok(is_alive) => is_alive, + Err(e) => { + error!("Failed to check instance {url}: {e}", url = instance.url); + false + } + } + }; + debug!("Instance is alive: {}", is_alive); + + let instance_history = match service_history.get_instance_mut(&instance.url) { + Some(instance_history) => instance_history, + None => { + service_history.add_instance(&instance.clone()); + service_history.get_instance_mut(&instance.url).unwrap() + } + }; + instance_history.ping_history.cleanup(); + instance_history.ping_history.push_ping(is_alive); + + instance.tags = update_instance_tags(client, instance.url.clone(), &instance.tags).await; + } + Ok(()) +} + +#[tokio::main] +async fn main() -> Result<()> { + let cli = Cli::parse(); + + configure_logging(&cli.log_level).ok(); + + match &cli.command { + Some(Commands::Actualize { + services, + output, + data, + }) => { + let config = load_config(&cli.config).context("failed to load config")?; + + let output = output.as_ref().unwrap_or(services); + debug!("Output file: {:?}", output); + + debug!("Reading data file: {:?}", data); + let mut actualizer_data: ActualizerData = { + if !data.is_file() { + warn!("Data file does not exist, creating new data"); + ActualizerData::new() + } else { + let data_content = + std::fs::read_to_string(data).context("failed to read data file")?; + serde_json::from_str(&data_content).context("failed to parse data file")? + } + }; + + debug!("Reading services file: {:?}", services); + let stored_data: StoredData = { + if !services.is_file() { + return Err(anyhow!("services file does not exist")); + } + let data_content = + std::fs::read_to_string(services).context("failed to read services file")?; + serde_json::from_str(&data_content).context("failed to parse services file")? + }; + let mut services_data = stored_data + .services + .into_iter() + .map(|service| (service.name.clone(), service)) + .collect(); + + let start = std::time::Instant::now(); + + actualizer_data.remove_removed_services(&services_data); + actualizer_data.remove_removed_instances(&services_data); + + let update_service_client = reqwest::Client::new(); + for (name, service) in services_data.iter_mut() { + update_service(service, update_service_client.clone()).await; + check_instances( + &mut actualizer_data, + &config.proxies, + name, + service, + &config.crawler, + ) + .await + .log_err(&format!("failed to check instances for service {name}")) + .ok(); + } + + actualizer_data.remove_dead_instances(&mut services_data); + + let elapsed = start.elapsed(); + info!("Elapsed time: {:?}", elapsed); + + // Write data back to file + let data_content = serde_json::to_string_pretty(&actualizer_data) + .context("failed to serialize data")?; + std::fs::write(data, data_content).context("failed to write data file")?; + let stored_data = StoredData { + services: services_data.into_values().collect(), + }; + let services_content = serde_json::to_string_pretty(&stored_data) + .context("failed to serialize services")?; + std::fs::write(output, services_content).context("failed to write services file")?; + } + None => Err(CliError::NoSubcommand) + .context("no subcommand was used. Pass --help to view available commands")?, + } + + Ok(()) } diff --git a/fastside-actualizer/src/serde_types.rs b/fastside-actualizer/src/serde_types.rs new file mode 100644 index 0000000..63d7cf5 --- /dev/null +++ b/fastside-actualizer/src/serde_types.rs @@ -0,0 +1,172 @@ +use std::collections::HashMap; + +use fastside_shared::serde_types::{Instance, Service, ServicesData}; +use serde::{Deserialize, Serialize}; +use url::Url; + +/// SingleInstancePing is a single ping for a instance in history. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct SingleInstancePing { + pub timestamp: u64, + pub success: bool, +} + +impl SingleInstancePing { + /// Create a new ping with given timestamp. + pub fn new(timestamp: u64, success: bool) -> Self { + Self { timestamp, success } + } + + /// Create a new ping with current timestamp. + pub fn now(success: bool) -> Self { + Self::new(chrono::Utc::now().timestamp() as u64, success) + } +} + +impl From for SingleInstancePing { + fn from(success: bool) -> Self { + Self::now(success) + } +} + +/// PingHistory is a list of pings for a single instance +/// +/// It is used to calculate uptime. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct PingHistory(Vec); + +impl PingHistory { + pub fn cleanup(&mut self) { + // Remove pings older than 7 days + let min_timestamp = (chrono::Utc::now() - chrono::Duration::days(7)).timestamp() as u64; + self.0.retain(|p| p.timestamp > min_timestamp); + } + + pub fn uptime(&self) -> u8 { + let total_pings = self.0.len(); + let successful_pings = self.0.iter().filter(|p| p.success).count(); + if total_pings == 0 { + 100 + } else { + ((successful_pings as f64 / total_pings as f64 * 100.0) as u8).clamp(0, 100) + } + } + + pub fn push_ping(&mut self, ping: impl Into) { + self.0.push(ping.into()); + } + + /// Check if PingHistory have enough pings to be considered ready + pub fn is_ready(&self) -> bool { + self.0.len() >= 50 + } +} + +/// InstanceHistory is a history of pings for an instance. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct InstanceHistory { + pub url: Url, + pub ping_history: PingHistory, +} + +impl From<&Instance> for InstanceHistory { + fn from(instance: &Instance) -> Self { + Self { + url: instance.url.clone(), + ping_history: PingHistory(Vec::new()), + } + } +} + +impl From for InstanceHistory { + fn from(url: Url) -> Self { + Self { + url, + ping_history: PingHistory(Vec::new()), + } + } +} + +/// ServiceHistory is a history of instances for a service. +#[derive(Serialize, Deserialize, Debug, Clone, Default)] +pub struct ServiceHistory { + #[serde(default)] + pub instances: Vec, +} + +const MIN_UPTIME: u8 = 30; + +impl ServiceHistory { + pub fn get_instance(&self, url: &Url) -> Option<&InstanceHistory> { + self.instances.iter().find(|i| &i.url == url) + } + + pub fn get_instance_mut(&mut self, url: &Url) -> Option<&mut InstanceHistory> { + self.instances.iter_mut().find(|i| &i.url == url) + } + + pub fn add_instance(&mut self, instance: impl Into) { + self.instances.push(instance.into()); + } + + /// Remove instances that are not in the list + pub fn remove_removed_instances(&mut self, instances: &[Instance]) { + self.instances + .retain(|i| instances.iter().any(|instance| i.url == instance.url)); + } + + /// Remove instances with uptime lower than 30% + pub fn remove_dead_instances(&self, service: &mut Service) { + let mut dead_instances = Vec::new(); + for instance in &self.instances { + if instance.ping_history.is_ready() && instance.ping_history.uptime() < MIN_UPTIME { + debug!("Removing dead instance: {}", instance.url); + dead_instances.push(instance.url.clone()); + } + } + service + .instances + .retain(|i| !dead_instances.contains(&i.url)); + } +} + +/// ActualizerData is a history of services availability. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct ActualizerData { + pub services: HashMap, +} + +impl ActualizerData { + pub fn new() -> Self { + Self { + services: HashMap::new(), + } + } + + pub fn remove_removed_services(&mut self, services: &ServicesData) { + let service_names: Vec = services.keys().cloned().collect(); + self.services.retain(|name, _| service_names.contains(name)); + } + + pub fn remove_removed_instances(&mut self, services: &ServicesData) { + for (name, service) in services { + if let Some(service_history) = self.services.get_mut(name) { + service_history.remove_removed_instances(&service.instances); + } + } + } + + pub fn remove_dead_instances(&self, services: &mut ServicesData) { + for (name, service) in services { + if let Some(service_history) = self.services.get(name) { + service_history.remove_dead_instances(service); + } + } + } +} + +impl Default for ActualizerData { + fn default() -> Self { + Self::new() + } +} diff --git a/fastside-actualizer/src/services/default.rs b/fastside-actualizer/src/services/default.rs new file mode 100644 index 0000000..b10a77c --- /dev/null +++ b/fastside-actualizer/src/services/default.rs @@ -0,0 +1,51 @@ +use async_trait::async_trait; +use fastside_shared::serde_types::{HttpCodeRanges, Instance, Service}; +use reqwest::Client; + +use crate::types::InstanceChecker; + +/// Default instance checker. +/// +/// Implements same logic as fastside crawler. +pub struct DefaultInstanceChecker; + +impl DefaultInstanceChecker { + pub fn new() -> Self { + Self + } +} + +impl Default for DefaultInstanceChecker { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl InstanceChecker for DefaultInstanceChecker { + async fn check( + &self, + client: Client, + service: &Service, + instance: &Instance, + ) -> anyhow::Result { + let response = client.get(instance.url.to_string()).send().await?; + let status_code = response.status().as_u16(); + if service.allowed_http_codes.is_allowed(status_code) { + if let Some(search_string) = &service.search_string { + let body = response.text().await?; + if body.contains(search_string) { + Ok(true) + } else { + debug!("Search string not found: {}", search_string); + Ok(false) + } + } else { + Ok(true) + } + } else { + debug!("Invalid status code: {}", status_code); + Ok(false) + } + } +} diff --git a/fastside-actualizer/src/services/mod.rs b/fastside-actualizer/src/services/mod.rs new file mode 100644 index 0000000..3104b71 --- /dev/null +++ b/fastside-actualizer/src/services/mod.rs @@ -0,0 +1,22 @@ +mod default; +mod searx; + +use crate::types::ServiceUpdater; + +pub use default::DefaultInstanceChecker; + +/// Get a service updater by name. +pub fn get_service_updater(name: &str) -> Option> { + match name { + "searx" => Some(Box::new(searx::SearxUpdater::new())), + _ => None, + } +} + +/// Get an instance checker by name. +pub fn get_instance_checker(name: &str) -> Box { + match name { + "searx" => Box::new(searx::SearxUpdater::new()), + _ => Box::new(DefaultInstanceChecker::new()), + } +} diff --git a/fastside-actualizer/src/services/searx.rs b/fastside-actualizer/src/services/searx.rs new file mode 100644 index 0000000..3f22156 --- /dev/null +++ b/fastside-actualizer/src/services/searx.rs @@ -0,0 +1,66 @@ +use std::collections::HashMap; + +use crate::types::{InstanceChecker, ServiceUpdater}; +use async_trait::async_trait; +use fastside_shared::serde_types::{Instance, Service}; +use serde::Deserialize; +use url::Url; + +pub struct SearxUpdater { + pub instances_url: String, +} + +impl SearxUpdater { + pub fn new() -> Self { + Self { + instances_url: "https://raw.githubusercontent.com/searx/searx-instances/master/searxinstances/instances.yml".to_string(), + } + } +} + +impl Default for SearxUpdater { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug, Deserialize)] +struct InstancesResponse(HashMap); + +#[async_trait] +impl ServiceUpdater for SearxUpdater { + async fn update( + &self, + client: reqwest::Client, + current_instances: &[Instance], + ) -> anyhow::Result> { + let response = client.get(&self.instances_url).send().await?; + let response_str = response.text().await?; + let parsed: InstancesResponse = serde_yaml::from_str(&response_str)?; + + let mut instances = current_instances.to_vec(); + + for url in parsed.0.keys() { + if current_instances.iter().any(|i| &i.url == url) { + continue; + } + + instances.push(Instance::from(url.clone())); + } + + Ok(instances) + } +} + +#[async_trait] +impl InstanceChecker for SearxUpdater { + async fn check( + &self, + client: reqwest::Client, + _service: &Service, + instance: &Instance, + ) -> anyhow::Result { + let response = client.get(instance.url.clone()).send().await?; + Ok(response.status().is_success()) + } +} diff --git a/fastside-actualizer/src/types.rs b/fastside-actualizer/src/types.rs new file mode 100644 index 0000000..1031612 --- /dev/null +++ b/fastside-actualizer/src/types.rs @@ -0,0 +1,27 @@ +use anyhow::Result; +use async_trait::async_trait; +use fastside_shared::serde_types::{Instance, Service}; +use reqwest::Client; + +#[async_trait] +pub trait ServiceUpdater { + /// Update the list of instances. + /// + /// Checks public list of instances and adds new entries to the list. + /// + /// # Arguments + /// + /// * `current_instances` - The current list of instances. + /// + /// # Returns + /// + /// The updated list of instances. + async fn update(&self, client: Client, current_instances: &[Instance]) + -> Result>; +} + +#[async_trait] +pub trait InstanceChecker { + /// Check single instance. + async fn check(&self, client: Client, service: &Service, instance: &Instance) -> Result; +} diff --git a/fastside-actualizer/src/utils/log_err.rs b/fastside-actualizer/src/utils/log_err.rs new file mode 100644 index 0000000..9c75402 --- /dev/null +++ b/fastside-actualizer/src/utils/log_err.rs @@ -0,0 +1,15 @@ +use std::fmt::Debug; + +pub trait LogErrResult { + /// Log an error message and return the original error. + fn log_err(self, msg: &str) -> Result; +} + +impl LogErrResult for Result { + fn log_err(self, msg: &str) -> Result { + self.map_err(|e| { + error!("{}: {:?}", msg, e); + e + }) + } +} diff --git a/fastside-actualizer/src/utils/mod.rs b/fastside-actualizer/src/utils/mod.rs new file mode 100644 index 0000000..5355ca0 --- /dev/null +++ b/fastside-actualizer/src/utils/mod.rs @@ -0,0 +1,3 @@ +pub mod log_err; +pub mod normalize; +pub mod tags; diff --git a/fastside-actualizer/src/utils/normalize.rs b/fastside-actualizer/src/utils/normalize.rs new file mode 100644 index 0000000..4cc5ac9 --- /dev/null +++ b/fastside-actualizer/src/utils/normalize.rs @@ -0,0 +1,11 @@ +use std::collections::HashSet; + +use fastside_shared::serde_types::Instance; + +/// Normalize instances by removing duplicates and sorting them. +pub fn normalize_instances(instances: &[Instance]) -> Vec { + let set: HashSet = instances.iter().cloned().collect(); + let mut vec: Vec = set.into_iter().collect(); + vec.sort(); + vec +} diff --git a/fastside-actualizer/src/utils/tags.rs b/fastside-actualizer/src/utils/tags.rs new file mode 100644 index 0000000..8defbc2 --- /dev/null +++ b/fastside-actualizer/src/utils/tags.rs @@ -0,0 +1,188 @@ +use anyhow::Result; +use async_std_resolver::{ + config, + proto::rr::{RData, RecordType}, + resolver, +}; +use ipnet::Ipv6Net; +use reqwest::Client; +use url::Url; + +use super::log_err::LogErrResult; + +const AUTO_TAGS: [&str; 9] = [ + "ipv4", + "ipv6", + "https", + "http", + "tor", + "i2p", + "ygg", + "alfis", + "cloudflare", +]; +const HIDDEN_DOMAINS: [&str; 2] = [".onion", ".i2p"]; + +fn remove_auto_tags(tags: &mut Vec) { + tags.retain(|tag| !AUTO_TAGS.contains(&tag.as_str())); +} + +async fn get_network_tags(client: Client, url: Url) -> Result> { + let is_hidden = if let Some(domain) = url.domain() { + HIDDEN_DOMAINS.iter().any(|d| domain.ends_with(d)) + } else { + false + }; + + let mut tags = Vec::new(); + let response = client.get(url).send().await?; + let headers = response.headers(); + if let Some(header) = headers.get("Server") { + let header_str = header.to_str()?; + if !is_hidden && header_str.contains("cloudflare") { + tags.push("cloudflare".to_string()); + } + } + Ok(tags) +} + +fn is_ygg(ip: &std::net::Ipv6Addr) -> bool { + let ygg_net: Ipv6Net = "200::/7".parse().unwrap(); + ygg_net.contains(ip) +} + +async fn get_dns_tags(url: Url) -> Result> { + let domain = match url.domain() { + Some(domain) => domain, + None => return Ok(Vec::new()), + }; + if HIDDEN_DOMAINS.iter().any(|d| domain.ends_with(d)) { + debug!("Skipping hidden domain DNS: {}", domain); + return Ok(Vec::new()); + } + + let mut tags = Vec::new(); + + let resolver = resolver( + config::ResolverConfig::default(), + config::ResolverOpts::default(), + ) + .await; + + // This shit is ugly as shit + // Simplest method to support CNAMEs with depth 1 + let mut lookup_domain = domain.to_string(); + let mut records: Vec = Vec::new(); + match resolver.lookup(lookup_domain.clone(), RecordType::A).await { + Ok(l) => records.extend(l.iter().cloned()), + Err(e) => { + debug!("Failed to lookup A record for {}: {}", lookup_domain, e); + } + }; + // Find CNAME records in response + for rdata in records.iter() { + if let RData::CNAME(_) = rdata { + lookup_domain = rdata.to_string(); + // Resolve A again + records.clear(); + match resolver.lookup(lookup_domain.clone(), RecordType::A).await { + Ok(l) => records.extend(l.iter().cloned()), + Err(e) => { + debug!("Failed to lookup A record for {}: {}", lookup_domain, e); + } + }; + break; + } + } + match resolver + .lookup(lookup_domain.clone(), RecordType::AAAA) + .await + { + Ok(l) => records.extend(l.iter().cloned()), + Err(e) => { + debug!("Failed to lookup AAAA record for {}: {}", lookup_domain, e); + } + }; + for rdata in records.iter() { + let ip = match rdata.ip_addr() { + Some(ip) => ip, + None => { + warn!("Get something other than IP: {:?}", rdata); + continue; + } + }; + match ip { + std::net::IpAddr::V4(_) => tags.push("ipv4".to_string()), + std::net::IpAddr::V6(ip) => { + tags.push("ipv6".to_string()); + if is_ygg(&ip) { + tags.push("ygg".to_string()); + } + } + } + } + + Ok(tags) +} + +fn get_url_tags(url: &Url) -> Vec { + let mut tags = Vec::new(); + if url.scheme() == "https" { + tags.push("https".to_string()); + } else { + tags.push("http".to_string()); + } + if let Some(host) = url.host_str() { + if host.ends_with(".onion") { + tags.push("tor".to_string()); + } + if host.ends_with(".i2p") { + tags.push("i2p".to_string()); + } + if host.ends_with(".ygg") { + tags.push("ygg".to_string()); + tags.push("alfis".to_string()); + } + if url.domain().is_none() { + if let Ok(ip) = host.parse::() { + match ip { + std::net::IpAddr::V4(_) => tags.push("ipv4".to_string()), + std::net::IpAddr::V6(ip) => { + tags.push("ipv6".to_string()); + if is_ygg(&ip) { + tags.push("ygg".to_string()); + } + } + } + } + } + } + tags +} + +/// Update instance tags. +/// +/// This function updates instance tags based on URL, network and DNS information. +pub async fn update_instance_tags(client: Client, url: Url, tags: &[String]) -> Vec { + let mut tags = tags.to_owned(); + // Remove auto tags + remove_auto_tags(&mut tags); + // Actualize auto tags + tags.extend(get_url_tags(&url)); + tags.extend( + get_network_tags(client, url.clone()) + .await + .log_err("Failed to get network tags") + .unwrap_or_default(), + ); + tags.extend( + get_dns_tags(url) + .await + .log_err("Failed to get DNS tags") + .unwrap_or_default(), + ); + // Remove duplicates and sort + tags.sort(); + tags.dedup(); + tags +} diff --git a/fastside-shared/Cargo.toml b/fastside-shared/Cargo.toml index f388184..5c23fc8 100644 --- a/fastside-shared/Cargo.toml +++ b/fastside-shared/Cargo.toml @@ -4,8 +4,17 @@ version = "0.1.0" edition = "2021" [dependencies] +reqwest = { version = "0.12.4", default-features = false, features = [ + "rustls-tls", + "http2", +] } # http client + serde = { version = "1.0.201", features = ["derive"] } # serialization serde_json = "1.0.117" # serialization url = { version = "2.5.0", features = ["serde"] } # url base64 = "0.22.1" # base64 thiserror = "1.0.60" # error +config = "0.14.0" # config +anyhow = "1.0.83" # error +log = "0.4.21" # logging +pretty_env_logger = "0.5.0" # logging diff --git a/fastside-shared/src/client_builder.rs b/fastside-shared/src/client_builder.rs new file mode 100644 index 0000000..678f4a5 --- /dev/null +++ b/fastside-shared/src/client_builder.rs @@ -0,0 +1,68 @@ +use reqwest::Client; + +use crate::{ + config::{CrawlerConfig, ProxyData}, + serde_types::{Instance, Service}, +}; + +fn default_headers() -> reqwest::header::HeaderMap { + let mut headers = reqwest::header::HeaderMap::new(); + headers.insert( + reqwest::header::USER_AGENT, + reqwest::header::HeaderValue::from_static( + "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", + ), + ); + headers.insert(reqwest::header::ACCEPT, reqwest::header::HeaderValue::from_static("text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8s")); + headers.insert( + reqwest::header::ACCEPT_LANGUAGE, + reqwest::header::HeaderValue::from_static("en-US,en;q=0.5"), + ); + headers.insert( + "X-Is-Fastside", + reqwest::header::HeaderValue::from_static("true"), + ); + headers +} + +pub fn build_client( + service: &Service, + config: &CrawlerConfig, + proxies: &ProxyData, + instance: &Instance, +) -> Result { + let redirect_policy = if service.follow_redirects { + reqwest::redirect::Policy::default() + } else { + reqwest::redirect::Policy::none() + }; + let mut client_builder = Client::builder() + .connect_timeout(config.request_timeout) + .read_timeout(config.request_timeout) + .default_headers(default_headers()) + .redirect(redirect_policy); + + let proxy_name: Option = { + let mut val: Option = None; + for proxy in proxies.keys() { + if instance.tags.contains(proxy) { + val = Some(proxy.clone()); + break; + } + } + val + }; + if let Some(proxy_name) = proxy_name { + let proxy_config = proxies.get(&proxy_name).unwrap(); + let proxy = { + let mut builder = reqwest::Proxy::all(&proxy_config.url)?; + if let Some(auth) = &proxy_config.auth { + builder = builder.basic_auth(&auth.username, &auth.password); + } + builder + }; + client_builder = client_builder.proxy(proxy); + } + + client_builder.build() +} diff --git a/fastside/src/config.rs b/fastside-shared/src/config.rs similarity index 54% rename from fastside/src/config.rs rename to fastside-shared/src/config.rs index 93c8578..3a02a2e 100644 --- a/fastside/src/config.rs +++ b/fastside-shared/src/config.rs @@ -1,11 +1,13 @@ //! Application configuration. -use std::{path::PathBuf, time::Duration}; +use std::{collections::HashMap, path::PathBuf, time::Duration}; use anyhow::{Context, Result}; use config::Config; use serde::{Deserialize, Serialize}; +use crate::errors::UserConfigError; + const fn default_ping_interval() -> Duration { // Every 5 minutes Duration::from_secs(60 * 5) @@ -40,11 +42,72 @@ impl Default for CrawlerConfig { } } +#[derive(Deserialize, Serialize, Debug, Clone)] +pub struct ProxyAuth { + pub username: String, + pub password: String, +} + +#[derive(Deserialize, Serialize, Debug, Clone)] +pub struct Proxy { + pub url: String, + #[serde(default)] + pub auth: Option, +} + +pub type ProxyData = HashMap; + +#[derive(Deserialize, Serialize, Debug, Clone, Default, PartialEq)] +pub enum SelectMethod { + #[default] + Random, + LowPing, +} + +fn default_required_tags() -> Vec { + vec![ + "clearnet".to_string(), + "https".to_string(), + "ipv4".to_string(), + ] +} + +#[derive(Deserialize, Serialize, Debug, Clone, Default)] +pub struct UserConfig { + #[serde(default = "default_required_tags")] + pub required_tags: Vec, + #[serde(default)] + pub forbidden_tags: Vec, + #[serde(default)] + pub select_method: SelectMethod, + #[serde(default)] + pub ignore_fallback_warning: bool, +} + +impl UserConfig { + pub fn to_config_string(&self) -> Result { + use base64::prelude::*; + let json: String = serde_json::to_string(&self).map_err(UserConfigError::Serialization)?; + Ok(BASE64_STANDARD.encode(json.as_bytes())) + } + + pub fn from_config_string(data: &str) -> Result { + use base64::prelude::*; + let decoded = BASE64_STANDARD.decode(data.as_bytes())?; + let json = String::from_utf8(decoded).unwrap(); + serde_json::from_str(&json).map_err(UserConfigError::from) + } +} + /// Application configuration. #[derive(Serialize, Deserialize, Debug, Clone)] pub struct AppConfig { #[serde(default)] pub crawler: CrawlerConfig, + #[serde(default)] + pub proxies: ProxyData, + #[serde(default)] + pub default_user_config: UserConfig, } /// Load application configuration. diff --git a/fastside-shared/src/errors.rs b/fastside-shared/src/errors.rs new file mode 100644 index 0000000..babe057 --- /dev/null +++ b/fastside-shared/src/errors.rs @@ -0,0 +1,15 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum CliError { + #[error("no subcommand was used")] + NoSubcommand, +} + +#[derive(Error, Debug)] +pub enum UserConfigError { + #[error("serialization error: `{0}`")] + Serialization(#[from] serde_json::Error), + #[error("urlencode error: `{0}`")] + Base64Decode(#[from] base64::DecodeError), +} diff --git a/fastside-shared/src/lib.rs b/fastside-shared/src/lib.rs index a2d060c..728e261 100644 --- a/fastside-shared/src/lib.rs +++ b/fastside-shared/src/lib.rs @@ -1 +1,8 @@ +pub mod client_builder; +pub mod config; +pub mod errors; +pub mod log_setup; pub mod serde_types; + +#[macro_use] +extern crate log; diff --git a/fastside/src/log_setup.rs b/fastside-shared/src/log_setup.rs similarity index 100% rename from fastside/src/log_setup.rs rename to fastside-shared/src/log_setup.rs diff --git a/fastside-shared/src/serde_types.rs b/fastside-shared/src/serde_types.rs index 91db68e..31823b2 100644 --- a/fastside-shared/src/serde_types.rs +++ b/fastside-shared/src/serde_types.rs @@ -4,15 +4,23 @@ use serde::{ de::{self, Visitor}, Deserialize, Deserializer, Serialize, Serializer, }; -use thiserror::Error; use url::Url; -#[derive(Deserialize, Serialize, Debug, Clone)] +#[derive(Deserialize, Serialize, Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct Instance { pub url: Url, pub tags: Vec, } +impl From for Instance { + fn from(url: Url) -> Self { + Instance { + url, + tags: Vec::new(), + } + } +} + fn default_test_url() -> String { "/".to_string() } @@ -170,67 +178,7 @@ pub struct Service { pub type ServicesData = HashMap; -#[derive(Deserialize, Serialize, Debug, Clone)] -pub struct ProxyAuth { - pub username: String, - pub password: String, -} - -#[derive(Deserialize, Serialize, Debug, Clone)] -pub struct Proxy { - pub url: String, - #[serde(default)] - pub auth: Option, -} - -pub type ProxyData = HashMap; - -#[derive(Deserialize, Serialize, Debug, Clone, Default, PartialEq)] -pub enum SelectMethod { - #[default] - Random, - LowPing, -} - -#[derive(Deserialize, Serialize, Debug, Clone, Default)] -pub struct UserConfig { - #[serde(default)] - pub required_tags: Vec, - #[serde(default)] - pub forbidden_tags: Vec, - #[serde(default)] - pub select_method: SelectMethod, - #[serde(default)] - pub ignore_fallback_warning: bool, -} - -#[derive(Error, Debug)] -pub enum UserConfigError { - #[error("serialization error: `{0}`")] - Serialization(#[from] serde_json::Error), - #[error("urlencode error: `{0}`")] - Base64Decode(#[from] base64::DecodeError), -} - -impl UserConfig { - pub fn to_config_string(&self) -> Result { - use base64::prelude::*; - let json: String = serde_json::to_string(&self).map_err(UserConfigError::Serialization)?; - Ok(BASE64_STANDARD.encode(json.as_bytes())) - } - - pub fn from_config_string(data: &str) -> Result { - use base64::prelude::*; - let decoded = BASE64_STANDARD.decode(data.as_bytes())?; - let json = String::from_utf8(decoded).unwrap(); - serde_json::from_str(&json).map_err(UserConfigError::from) - } -} - #[derive(Deserialize, Serialize, Debug, Clone)] pub struct StoredData { pub services: Vec, - pub proxies: ProxyData, - #[serde(default)] - pub default_settings: UserConfig, } diff --git a/fastside/Cargo.toml b/fastside/Cargo.toml index a5d711c..f390df0 100644 --- a/fastside/Cargo.toml +++ b/fastside/Cargo.toml @@ -20,13 +20,10 @@ reqwest = { version = "0.12.4", default-features = false, features = [ ] } # http client clap = { version = "4.5.4", features = ["derive"] } # cli -config = "0.14.0" # config serde = { version = "1.0.201", features = ["derive"] } # serialization serde_json = "1.0.117" # serialization -serde_qs = "0.13.0" # serialization url = { version = "2.5.0", features = ["serde"] } # url log = "0.4.21" # logging -pretty_env_logger = "0.5.0" # logging anyhow = "1.0.83" # error thiserror = "1.0.60" # error tokio = { version = "1.37.0", features = ["full"] } # async diff --git a/fastside/src/crawler.rs b/fastside/src/crawler.rs index a822e4b..af1832b 100644 --- a/fastside/src/crawler.rs +++ b/fastside/src/crawler.rs @@ -5,33 +5,16 @@ use std::{ }; use chrono::{DateTime, Utc}; -use reqwest::{Client, StatusCode}; +use reqwest::StatusCode; use thiserror::Error; use tokio::{sync::RwLock, time::sleep}; use url::Url; use crate::{config::CrawlerConfig, types::LoadedData, utils::parallel::Parallelise}; -use fastside_shared::serde_types::{HttpCodeRanges, Instance, Service}; - -fn default_headers() -> reqwest::header::HeaderMap { - let mut headers = reqwest::header::HeaderMap::new(); - headers.insert( - reqwest::header::USER_AGENT, - reqwest::header::HeaderValue::from_static( - "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", - ), - ); - headers.insert(reqwest::header::ACCEPT, reqwest::header::HeaderValue::from_static("text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8s")); - headers.insert( - reqwest::header::ACCEPT_LANGUAGE, - reqwest::header::HeaderValue::from_static("en-US,en;q=0.5"), - ); - headers.insert( - "X-Is-Fastside", - reqwest::header::HeaderValue::from_static("true"), - ); - headers -} +use fastside_shared::{ + client_builder::build_client, + serde_types::{HttpCodeRanges, Instance, Service}, +}; #[derive(Error, Debug)] pub enum CrawlerError { @@ -128,40 +111,12 @@ impl Crawler { service: Arc, instance: Instance, ) -> Result<(CrawledInstance, String), CrawlerError> { - let redirect_policy = if service.follow_redirects { - reqwest::redirect::Policy::default() - } else { - reqwest::redirect::Policy::none() - }; - let mut client_builder = Client::builder() - .connect_timeout(config.request_timeout) - .read_timeout(config.request_timeout) - .default_headers(default_headers()) - .redirect(redirect_policy); - - let proxy_name: Option = { - let mut val: Option = None; - for proxy in loaded_data.proxies.keys() { - if instance.tags.contains(proxy) { - val = Some(proxy.clone()); - break; - } - } - val - }; - if let Some(proxy_name) = proxy_name { - let proxy_config = loaded_data.proxies.get(&proxy_name).unwrap(); - let proxy = { - let mut builder = reqwest::Proxy::all(&proxy_config.url)?; - if let Some(auth) = &proxy_config.auth { - builder = builder.basic_auth(&auth.username, &auth.password); - } - builder - }; - client_builder = client_builder.proxy(proxy); - } - - let client = client_builder.build().unwrap(); + let client = build_client( + service.as_ref(), + config.as_ref(), + &loaded_data.proxies, + &instance, + )?; let test_url = instance.url.join(&service.test_url)?; let start = SystemTime::now().duration_since(UNIX_EPOCH).unwrap(); diff --git a/fastside/src/errors.rs b/fastside/src/errors.rs index b2fabf0..51f9ee4 100644 --- a/fastside/src/errors.rs +++ b/fastside/src/errors.rs @@ -68,7 +68,7 @@ pub enum RedirectError { #[error("url parse error: `{0}`")] UrlParse(#[from] url::ParseError), #[error("user config error: `{0}`")] - UserConfig(#[from] fastside_shared::serde_types::UserConfigError), + UserConfig(#[from] fastside_shared::errors::UserConfigError), } impl_template_error!(RedirectError, diff --git a/fastside/src/main.rs b/fastside/src/main.rs index 9f9c023..27268b0 100644 --- a/fastside/src/main.rs +++ b/fastside/src/main.rs @@ -1,9 +1,7 @@ -/// Fastside API server. -mod config; +//! Fastside API server. mod crawler; mod errors; mod filters; -mod log_setup; mod routes; mod search; mod types; @@ -14,7 +12,12 @@ use anyhow::{Context, Result}; use clap::{Parser, Subcommand}; use config::load_config; use crawler::Crawler; -use fastside_shared::serde_types::{ServicesData, StoredData}; +use fastside_shared::{ + config, + errors::CliError, + log_setup, + serde_types::{ServicesData, StoredData}, +}; use log_setup::configure_logging; use regex::Regex; use routes::main_scope; @@ -25,7 +28,6 @@ use std::{ str::FromStr, sync::Arc, }; -use thiserror::Error; use types::{CompiledRegexSearch, LoadedData}; #[deny(unused_imports)] @@ -66,12 +68,6 @@ enum Commands { }, } -#[derive(Error, Debug)] -pub enum CliError { - #[error("no subcommand was used")] - NoSubcommand, -} - // This function is needed to take ownership over cloned reference to crawler. async fn crawler_loop(crawler: Arc) { crawler.crawler_loop().await @@ -115,8 +111,8 @@ async fn main() -> Result<()> { .collect(); let data = LoadedData { services: services_data, - proxies: stored_data.proxies, - default_settings: stored_data.default_settings, + proxies: config.proxies.clone(), + default_user_config: config.default_user_config.clone(), }; Arc::new(data) diff --git a/fastside/src/routes/api.rs b/fastside/src/routes/api.rs index 7c386ce..aedf5df 100644 --- a/fastside/src/routes/api.rs +++ b/fastside/src/routes/api.rs @@ -1,4 +1,5 @@ use actix_web::{post, web, Responder, Scope}; +use fastside_shared::config::UserConfig; use serde::{Deserialize, Serialize}; use crate::{ @@ -7,7 +8,6 @@ use crate::{ errors::{RedirectApiError, RedirectError}, types::{LoadedData, Regexes}, }; -use fastside_shared::serde_types::UserConfig; pub fn scope(_config: &AppConfig) -> Scope { web::scope("/api/v1") diff --git a/fastside/src/routes/config.rs b/fastside/src/routes/config.rs index 38790ce..9d45ee6 100644 --- a/fastside/src/routes/config.rs +++ b/fastside/src/routes/config.rs @@ -1,11 +1,11 @@ use actix_web::{cookie::Cookie, get, http::header::LOCATION, web, HttpRequest, Responder, Scope}; use askama::Template; +use fastside_shared::config::UserConfig; use crate::{ config::AppConfig, errors::RedirectError, types::LoadedData, utils::user_config::load_settings_cookie, }; -use fastside_shared::serde_types::UserConfig; pub fn scope(_config: &AppConfig) -> Scope { web::scope("/configure") @@ -24,7 +24,7 @@ async fn configure_page( req: HttpRequest, loaded_data: web::Data, ) -> actix_web::Result { - let user_config = load_settings_cookie(&req, &loaded_data.default_settings); + let user_config = load_settings_cookie(&req, &loaded_data.default_user_config); let template = ConfigureTemplate { current_config: &user_config diff --git a/fastside/src/routes/redirect.rs b/fastside/src/routes/redirect.rs index 166cf8e..c0547f7 100644 --- a/fastside/src/routes/redirect.rs +++ b/fastside/src/routes/redirect.rs @@ -16,7 +16,10 @@ use crate::{ types::{LoadedData, Regexes}, utils::user_config::load_settings_cookie, }; -use fastside_shared::serde_types::{SelectMethod, Service, UserConfig}; +use fastside_shared::{ + config::{SelectMethod, UserConfig}, + serde_types::Service, +}; pub fn scope(_config: &AppConfig) -> Scope { web::scope("") @@ -43,7 +46,7 @@ async fn cached_redirect( ) -> actix_web::Result { let (service_name, _) = path.into_inner(); - let user_config = load_settings_cookie(&req, &loaded_data.default_settings); + let user_config = load_settings_cookie(&req, &loaded_data.default_user_config); let guard = crawler.read().await; let (crawled_service, _) = @@ -168,7 +171,7 @@ async fn base_redirect( ) -> actix_web::Result { let path = path.into_inner(); - let user_config = load_settings_cookie(&req, &loaded_data.default_settings); + let user_config = load_settings_cookie(&req, &loaded_data.default_user_config); let (mut url, is_fallback) = find_redirect( crawler.get_ref(), diff --git a/fastside/src/search.rs b/fastside/src/search.rs index 7943cb2..b2ae85f 100644 --- a/fastside/src/search.rs +++ b/fastside/src/search.rs @@ -7,7 +7,10 @@ use crate::{ crawler::{CrawledInstance, CrawledInstanceStatus, CrawledService, CrawledServices}, types::Regexes, }; -use fastside_shared::serde_types::{SelectMethod, Service, ServicesData, UserConfig}; +use fastside_shared::{ + config::{SelectMethod, UserConfig}, + serde_types::{Service, ServicesData}, +}; use rand::seq::SliceRandom; use thiserror::Error; diff --git a/fastside/src/types.rs b/fastside/src/types.rs index a6daed9..9f9e603 100644 --- a/fastside/src/types.rs +++ b/fastside/src/types.rs @@ -1,6 +1,9 @@ use std::collections::HashMap; -use fastside_shared::serde_types::{ProxyData, ServicesData, UserConfig}; +use fastside_shared::{ + config::{ProxyData, UserConfig}, + serde_types::ServicesData, +}; pub struct CompiledRegexSearch { pub regex: regex::Regex, @@ -13,5 +16,5 @@ pub type Regexes = HashMap>; pub struct LoadedData { pub services: ServicesData, pub proxies: ProxyData, - pub default_settings: UserConfig, + pub default_user_config: UserConfig, } diff --git a/fastside/src/utils/user_config.rs b/fastside/src/utils/user_config.rs index 577ee44..e48148d 100644 --- a/fastside/src/utils/user_config.rs +++ b/fastside/src/utils/user_config.rs @@ -1,6 +1,5 @@ use actix_web::HttpRequest; - -use fastside_shared::serde_types::UserConfig; +use fastside_shared::config::UserConfig; pub fn load_settings_cookie(req: &HttpRequest, default: &UserConfig) -> UserConfig { let cookie = match req.cookie("config") { diff --git a/services.json b/services.json index a9db9b0..79b4257 100644 --- a/services.json +++ b/services.json @@ -9251,18 +9251,5 @@ } ] } - ], - "proxies": { - "tor": { - "url": "socks5h://127.0.0.1:9050" - }, - "i2p": { - "url": "http://127.0.0.1:4444" - } - }, - "default_settings": { - "required_tags": [ - "clearnet" - ] - } + ] }