Skip to content

Commit

Permalink
Cleanup and overhaul
Browse files Browse the repository at this point in the history
  • Loading branch information
Scripter17 committed Sep 7, 2024
1 parent 1e0ffbb commit 25cae38
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 76 deletions.
39 changes: 26 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,27 +15,40 @@ A very basic HTTP server and userscript to allow automatically applying [URL Cle
It binds to `0.0.0.0:9149` by default and `http://localhost:9149/clean` takes a simple job of the following form

```Rust
#[derive(Debug, Clone, Serialize, Deserialize)]
struct Job {
urls: Vec<String>,
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct BulkJob {
#[serde(alias = "urls", alias = "configs")]
pub job_configs: Vec<JobConfig>,
#[serde(default)]
params_diff: Option<url_cleaner::types::ParamsDiff>
pub params_diff: Option<ParamsDiff>
}
```

and returns a response of the following form
and returns a response `Result<CleaningSuccess, CleaningError>` which is defined as

```Rust
#[derive(Debug, Clone, Serialize, Deserialize)]
struct JobResponse {
urls: Vec<Result<Url, JobError>>
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CleaningSuccess {
pub urls: Vec<Result<Url, JobError>>
}

#[derive(Debug, Clone, Serialize, Deserialize)]
struct JobError {
r#type: String,
source_url: String,
error: String
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct JobError {
pub r#type: JobErrorType,
pub message: String,
pub variant: String
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum JobErrorType {
GetJobError,
DoJobError
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CleaningError {
status: u16,
reason: Option<&'static str>
}
```

Expand Down
106 changes: 58 additions & 48 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#[macro_use] extern crate rocket;
use rocket::serde::json::Json;
use rocket::http::Header;
use rocket::{Request, Response};
use rocket::fairing::{Fairing, Info, Kind};
use rocket::http::Status;
use rocket::Request;
use rocket::data::Limits;

use std::net::IpAddr;
Expand Down Expand Up @@ -75,96 +74,107 @@ fn rocket() -> _ {
limits: Limits::default().limit("json", args.max_size),
..rocket::Config::default()
})
.mount("/", routes![index])
.mount("/", routes![index, clean, get_max_json_size, get_config])
.mount("/clean", routes![clean])
.register("/clean", catchers![clean_error])
.mount("/get-max-json-size", routes![get_max_json_size])
.mount("/get-config", routes![get_config])
.attach(Anarcors)
}

#[get("/")]
fn index() -> &'static str {
r#"Both URL Cleaner Site and URL Cleaner are licensed under the Affero General Public License V3 or later (SPDX: AGPL-3.0-or-later).
https://en.wikipedia.org/wiki/GNU_Affero_General_Public_License
https://www.gnu.org/licenses/agpl-3.0.html
The original source code of URL Cleaner: https://github.com/Scripter17/url-cleaner
The original source code of URL Cleaner Site: https://github.com/Scripter17/url-cleaner-site
The original source code of URL Cleaner: https://github.com/Scripter17/url-cleaner
The modified source code of URL Cleaner (if applicable):
The modified source code of URL Cleaner Site (if applicable): "#
The modified source code of URL Cleaner Site (if applicable):
The modified source code of URL Cleaner (if applicable):"#
}

#[get("/")]
#[get("/get_config")]
fn get_config() -> &'static str {
CONFIG_STRING.get().unwrap()
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
struct BulkJob {
jobs: Vec<JobConfig>,
pub struct BulkJob {
#[serde(alias = "urls", alias = "configs")]
pub job_configs: Vec<JobConfig>,
#[serde(default)]
params_diff: Option<ParamsDiff>
pub params_diff: Option<ParamsDiff>
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct JobError {
pub r#type: JobErrorType,
pub message: String,
pub variant: String
}

#[derive(Debug, Clone, Serialize, Deserialize)]
struct JobError {
r#type: &'static str,
error: String
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum JobErrorType {
GetJobError,
DoJobError
}

impl From<GetJobError> for JobError {
fn from(value: GetJobError) -> Self {
Self {
r#type: "GetJobError",
error: value.to_string()
r#type: JobErrorType::GetJobError,
message: value.to_string(),
variant: format!("{value:?}")
}
}
}

impl From<DoJobError> for JobError {
fn from(value: DoJobError) -> Self {
Self {
r#type: "DoJobError",
error: value.to_string()
r#type: JobErrorType::DoJobError,
message: value.to_string(),
variant: format!("{value:?}")
}
}
}

#[post("/", data="<bulk_job>")]
fn clean(bulk_job: Json<BulkJob>) -> Json<Vec<Result<Url, JobError>>> {
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CleaningSuccess {
pub urls: Vec<Result<Url, JobError>>
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CleaningError {
status: u16,
reason: Option<&'static str>
}

#[post("/clean", data="<bulk_job>")]
fn clean(bulk_job: Json<BulkJob>) -> Json<Result<CleaningSuccess, ()>> {
let bulk_job = bulk_job.0;
let mut config = Cow::Borrowed(CONFIG.get().unwrap());
if let Some(params_diff) = bulk_job.params_diff {
params_diff.apply(&mut config.to_mut().params);
}
Json(Jobs {
config,
cache_handler: CACHE_HANDLER.get().unwrap().clone(),
job_source: Box::new(bulk_job.jobs.into_iter().map(Ok))
}.r#do().into_iter().map(|job_result| Ok(job_result??)).collect())
Json(Ok(CleaningSuccess {
urls: Jobs {
config,
cache_handler: CACHE_HANDLER.get().unwrap().clone(), // It's a newtype around an Arc, so cloning is O(1).
configs_source: Box::new(bulk_job.job_configs.into_iter().map(Ok))
}.r#do().into_iter().map(|job_result| Ok(job_result??)).collect()
}))
}

#[get("/")]
fn get_max_json_size() -> String {
MAX_JSON_SIZE.get().unwrap().as_u64().to_string()
#[catch(default)]
fn clean_error(status: Status, _request: &Request) -> Json<Result<(), CleaningError>> {
Json(Err(CleaningError {
status: status.code,
reason: status.reason()
}))
}

struct Anarcors;

#[rocket::async_trait]
impl Fairing for Anarcors {
fn info(&self) -> Info {
Info {
name: "Add CORS headers to response",
kind: Kind::Response
}
}

async fn on_response<'r>(&self, _request: &'r Request<'_>, response: &mut Response<'r>) {
response.set_header(Header::new("Access-Control-Allow-Origin", "*"));
response.set_header(Header::new("Access-Control-Allow-Methods", "POST, PATCH, PUT, DELETE, HEAD, OPTIONS, GET"));
response.set_header(Header::new("Access-Control-Allow-Headers", "*"));
response.set_header(Header::new("Access-Control-Allow-Credentials", "true"));
}
#[get("/get_max_json_size")]
fn get_max_json_size() -> String {
MAX_JSON_SIZE.get().unwrap().as_u64().to_string()
}
35 changes: 20 additions & 15 deletions userscript.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ window.PARAMS_DIFF = {"vars": {"SOURCE_URL": window.location.href, "SOURCE_HOST"
})})();

function elements_to_bulk_job(elements) {
return {jobs: elements.map(x => element_to_job_config(x)), params_diff: window.PARAMS_DIFF};
return {urls: elements.map(x => element_to_job_config(x)), params_diff: window.PARAMS_DIFF};
}

function element_to_job_config(element) {
Expand All @@ -43,7 +43,7 @@ function element_to_job_config(element) {
async function clean_all_urls_on_page() {
var elements = [...document.getElementsByTagName("a")]
.filter(e => e.href.startsWith("http") && // Relative URLs are replaced with absolute URLs when getting the `href` property. Also cleaning "javscript:void(0)" returns an error for some reason.
e.getAttribute("url-cleaned") == null);
e.getAttribute("url-cleaner") == null);
a: if (elements.length > 0) {
// Limit total size of request. Repeated iterations will get all link elements.
while (JSON.stringify(elements_to_bulk_job(elements)).length > window.MAX_JSON_SIZE) {
Expand All @@ -67,21 +67,26 @@ async function clean_all_urls_on_page() {
method: "POST",
data: JSON.stringify(elements_to_bulk_job(elements)),
onload: function(response) {
JSON.parse(response.responseText).forEach(function (cleaning_result, index) {
if (cleaning_result.Err == null) {
if (elements[index].href != cleaning_result.Ok) {
elements[index].href = cleaning_result.Ok;
elements[index].setAttribute("url-cleaned", "success");
let result = JSON.parse(response.responseText);
if (result.Ok !== null) {
result.Ok.urls.forEach(function (cleaning_result, index) {
if (cleaning_result.Err == null) {
if (elements[index].href != cleaning_result.Ok) {
elements[index].href = cleaning_result.Ok;
elements[index].setAttribute("url-cleaner", "success");
} else {
elements[index].setAttribute("url-cleaner", "unchanged");
}
} else {
elements[index].setAttribute("url-cleaned", "unchanged");
console.error("URL Cleaner job error:", cleaning_result, index, elements[index]);
elements[index].setAttribute("url-cleaner", "error");
elements[index].setAttribute("url-cleaner-error", JSON.stringify(cleaning_result.Err));
elements[index].style.color = "red";
}
} else {
console.error("URL Cleaner error:", cleaning_result, index, elements[index]);
elements[index].setAttribute("url-cleaned", "error");
elements[index].setAttribute("url-cleaner-error", cleaning_result.Err);
elements[index].style.color = "red";
}
})
})
} else {
console.error("URL Cleaner bulk job error", result);
}
}
});
}
Expand Down

0 comments on commit 25cae38

Please sign in to comment.