-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmaroof_scraper.py
32 lines (25 loc) · 1.07 KB
/
maroof_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# Scraping data from Maroof.sa
# This file contains the needed functions to extracts merchandise lists
# Import the required liberaries
from os import link
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.microsoft import EdgeChromiumDriverManager
URL = "https://maroof.sa/BusinessType/BusinessesByTypeList?bid=51&sortProperty=BestRating&DESC=True"
def get_links(url=URL, pagenation=5):
# instantiate webdriver
driver = webdriver.Edge(EdgeChromiumDriverManager().install())
# get data from url
driver.get(url)
# click on "Load more" button
# to call all needed data to be crowl
for i in range(0, pagenation):
time.sleep(0.5)
driver.find_element(By.XPATH, '//*[@id="loadMore"]/button').click()
# extract data from the same website
links = driver.find_elements(By.CLASS_NAME, "tab-pro-container")
links = [link.get_attribute("href") for link in links]
print(f"{len(links)} data was scraped successfully")
print(links[:5])
## -------------------------------------