Skip to content

Commit

Permalink
restart from italy
Browse files Browse the repository at this point in the history
  • Loading branch information
wenhwang97 committed Feb 13, 2025
1 parent 3bc1394 commit 24d053a
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions scraper/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,6 @@ def scrape_country(country, latest_date, keywords):
if contype == "政策":
continue

for script in article.find(id="zoom").find_all("script"):
script.decompose()

if article.find(id="artitle") is not None:
title = article.find(id="artitle").text.strip()
else:
Expand Down Expand Up @@ -235,7 +232,15 @@ def scrape_country(country, latest_date, keywords):
def scrape():
print("[MOF Scraper] Sraping started at " + datetime.now().isoformat() + "\n")
ignore = ["CN", "HK", "MO", "TW"] # ignore Mainland China, Hong Kong, Macau, and Taiwan
start_scraping = False
start_point = "IT"
for country in pycountry.countries:
if country.alpha_2 == start_point:
start_scraping = True

if not start_scraping:
continue

if country.alpha_2 not in ignore:
url = os.getenv("NOCO_DB_URL")
headers = {"xc-token": os.getenv("NOCO_XC_TOKEN")}
Expand Down

0 comments on commit 24d053a

Please sign in to comment.