From 8ef8e402f585309e9f4647aa9a4169638e514aab Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Mon, 11 Nov 2024 14:15:31 +0100 Subject: [PATCH] moderation: add CLI for link domains management --- site/setup.cfg | 2 +- site/zenodo_rdm/cli.py | 75 ++++++++++++++++++++++------ site/zenodo_rdm/moderation/models.py | 4 ++ site/zenodo_rdm/moderation/rules.py | 21 ++++---- 4 files changed, 76 insertions(+), 26 deletions(-) diff --git a/site/setup.cfg b/site/setup.cfg index 784ee087..7bd0290c 100644 --- a/site/setup.cfg +++ b/site/setup.cfg @@ -35,7 +35,7 @@ tests = [options.entry_points] flask.commands = zenodo-admin = zenodo_rdm.cli:zenodo_admin - moderation = zenodo_rdm.cli:moderation + moderation = zenodo_rdm.cli:moderation_cli invenio_base.blueprints = zenodo_rdm_legacy = zenodo_rdm.legacy.views:blueprint zenodo_rdm_support = zenodo_rdm.views:create_blueprint diff --git a/site/zenodo_rdm/cli.py b/site/zenodo_rdm/cli.py index 971db893..1d27e71f 100644 --- a/site/zenodo_rdm/cli.py +++ b/site/zenodo_rdm/cli.py @@ -29,7 +29,7 @@ from invenio_requests.records.models import RequestMetadata from zenodo_rdm.api import ZenodoRDMRecord -from zenodo_rdm.moderation.models import ModerationQuery +from zenodo_rdm.moderation.models import LinkDomain, LinkDomainStatus, ModerationQuery from zenodo_rdm.moderation.percolator import ( create_percolator_index, get_percolator_index, @@ -264,7 +264,7 @@ def moderation_cli(): """Moderation commands.""" -@moderation_cli.group() +@moderation_cli.group("queries") def queries_cli(): """Moderation queries commands.""" @@ -333,18 +333,14 @@ def add_query(record_cls, query_string, notes, score, active, file): """Command to add a moderation query from CSV or directly and index it.""" record_cls = ZenodoRDMRecord if record_cls == "records" else Community - try: - if file: - add_queries_from_csv(file, record_cls) - else: - create_and_index_query(record_cls, query_string, notes, score, active) + if file: + _add_queries_from_csv(file, record_cls) + else: + _create_and_index_query(record_cls, query_string, notes, score, active) + click.secho("Queries added and indexed successfully.", fg="green") - click.secho("Queries added and indexed successfully.") - except Exception as e: - click.secho(f"Error adding or indexing query: {e}") - -def add_queries_from_csv(file_path, record_cls=ZenodoRDMRecord): +def _add_queries_from_csv(file_path, record_cls=ZenodoRDMRecord): """Load queries from a CSV file, add them to the database, and index them.""" with open(file_path, mode="r", newline="", encoding="utf-8") as csvfile: csvreader = csv.reader(csvfile) @@ -360,12 +356,12 @@ def add_queries_from_csv(file_path, record_cls=ZenodoRDMRecord): # Ensure to add query only if there's a query string if query_string: - create_and_index_query( + _create_and_index_query( record_cls, query_string, notes, score, active ) -def create_and_index_query(record_cls, query_string, notes, score, active): +def _create_and_index_query(record_cls, query_string, notes, score, active): """Create and index a single moderation query.""" query = ModerationQuery.create( query_string=query_string, notes=notes, score=score, active=active @@ -373,3 +369,54 @@ def create_and_index_query(record_cls, query_string, notes, score, active): db.session.commit() index_percolate_query(record_cls, query.id, query_string, active, score, notes) + + +@moderation_cli.group("domains") +def domains_cli(): + """Moderation domains commands.""" + + +@domains_cli.command("add") +@click.option("-d", "--domain", required=True, help="The domain to add.") +@click.option("-n", "--notes", help="Additional notes for the domain.") +@click.option( + "-s", + "--status", + type=click.Choice(["banned", "safe"], case_sensitive=False), + help="The status for the domain.", + default="banned", +) +@click.option("--score", type=int, help="The score for the domain.") +@click.option( + "-f", + "--file", + type=click.Path(exists=True, readable=True), + help="Path to CSV file containing queries.", +) +@with_appcontext +def add_domain(domain, notes, status, score, file): + """Command to add a moderated links domain.""" + if file: + _add_domains_from_csv(file) + else: + _create_domain(domain, notes, score, status) + + +def _create_domain(domain, notes, score, status): + """Create a moderated links domain.""" + status = LinkDomainStatus.BANNED if status == "banned" else LinkDomainStatus.SAFE + domain = LinkDomain.create(domain, status, score, notes) + db.session.commit() + click.secho(f"Domain {domain} added successfully.", fg="green") + + +def _add_domains_from_csv(file_path): + """Load domains from a CSV file, add them to the database.""" + with open(file_path, mode="r", encoding="utf-8") as csvfile: + reader = csv.DictReader(csvfile) + for entry in reader: + domain = entry["domain"].strip() + notes = entry.get("notes", None) + score = entry.get("score") or None + status = entry.get("status", "banned") + _create_domain(domain, notes, score, status) diff --git a/site/zenodo_rdm/moderation/models.py b/site/zenodo_rdm/moderation/models.py index 57f325b2..dc515d63 100644 --- a/site/zenodo_rdm/moderation/models.py +++ b/site/zenodo_rdm/moderation/models.py @@ -74,6 +74,10 @@ def lookup_domain(cls, url): .scalar() ) + def __repr__(self): + """Get a string representation of the link domain.""" + return f"" + class ModerationQuery(db.Model): """Moderation queries model.""" diff --git a/site/zenodo_rdm/moderation/rules.py b/site/zenodo_rdm/moderation/rules.py index 8e6b8462..5ce69d47 100644 --- a/site/zenodo_rdm/moderation/rules.py +++ b/site/zenodo_rdm/moderation/rules.py @@ -11,7 +11,6 @@ from flask import current_app from invenio_search import current_search_client -from invenio_search.utils import build_alias_name from .models import LinkDomain, LinkDomainStatus from .percolator import get_percolator_index @@ -70,16 +69,16 @@ def links_rule(identity, draft=None, record=None): domain = LinkDomain.lookup_domain(link) if domain is None: continue - if domain.status == LinkDomainStatus.BANNED: - if domain.score is not None: - score += domain.score - else: - score += current_scores.spam_link - elif domain == LinkDomainStatus.SAFE: - if domain.score is not None: - score += domain.score - else: - score += current_scores.ham_link + default_score = ( + current_scores.ham_link + if domain.status == LinkDomainStatus.SAFE + else current_scores.spam_link + ) + if domain.score is not None: + score += domain.score + else: + score += default_score + return score