Skip to content

Commit

Permalink
Added some feature requests from #16
Browse files Browse the repository at this point in the history
- Splitted Vinted and Depop in separate tables in the sqlite db.

- [Depop] Added option `-n` to disable file download and only scrape product info as suggested in #16

- [Depop] More product info will be scraped now (Address, discountedPriceAmount, dateUpdated).
  • Loading branch information
Gertje823 committed Nov 13, 2022
1 parent 6fcd321 commit 9e7f8f8
Show file tree
Hide file tree
Showing 2 changed files with 141 additions and 52 deletions.
15 changes: 10 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@ The script will download all the images and videos and put it in the downloads f
The data will be stored in the SQLite database.

## Arguments
`-p` scrape all images from your private messages. (requires `-s` to login and `-u` to set your userid)
`-s "your_vinted_fr_session"` to login to your account. [how to get sessionid?](https://github.com/Gertje823/Vinted-Scraper/wiki/How-to-get-Vinted-sessionID%3F)
`-u` set your userid
`-p` [Vinted] scrape all images from your private messages. (requires `-s` to login and `-u` to set your userid)
`-s "your_vinted_fr_session"` [Vinted] to login to your account. [how to get sessionid?](https://github.com/Gertje823/Vinted-Scraper/wiki/How-to-get-Vinted-sessionID%3F)
`-u` [Vinted] set your userid
`-n` [Depop] Disable file download (Only scrape product info)
`-g` [Depop] Also download sold items


### Example:
Download all images from private messages from your Vinted account
Expand Down Expand Up @@ -90,9 +93,11 @@ All the info will be stored in the sqlite db in the following tables:
`Brand`
`Colors`
`Price`
`Images`
`Image`
`Description`
`Title`
`Platform`

`Address`
`discountedPriceAmount`
`dateUpdated`
If you have any feature requests don't hesitate to open a issue :)
178 changes: 131 additions & 47 deletions scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
parser.add_argument('--private_msg','-p',dest='priv_msg', action='store_true', help='Download images from private messages from Vinted')
parser.add_argument('--user_id','-u',dest='user_id', action='store', help='Your own userid', required=False)
parser.add_argument('--session_id','-s',dest='session_id', action='store', help='Session id cookie for Vinted', required=False)
parser.add_argument('--disable-file-download','-n',dest='disable_file_download', action='store_true', help='Disable file download (Currently only working for depop)', required=False)
parser.add_argument('--sold_items','-g',dest='sold_items', action='store_true', help='Also download sold items (depop)', required=False)
args = parser.parse_args()

# create downlods folders
Expand All @@ -32,6 +34,8 @@
# Create Data table if not exists
c.execute('''CREATE TABLE IF NOT EXISTS Data
(ID, User_id, Sold, Gender, Category, subcategory, size, State, Brand, Colors, Price, Image, Images, Description, Title, Platform)''')
c.execute('''CREATE TABLE IF NOT EXISTS Depop_Data
(ID UNIQUE, User_id, Sold, Gender, Category, subcategory, size, State, Brand, Colors, Price, Image, Description, Title, Platform, Address, discountedPriceAmount, dateUpdated)''')
# Create Users table if not exists
c.execute('''CREATE TABLE IF NOT EXISTS Users
(Username, User_id, Gender, Given_item_count, Taken_item_count, Followers_count, Following_count, Positive_feedback_count, Negative_feedback_count, Feedback_reputation, Avatar, Created_at, Last_loged_on_ts, City_id, City, Country_title, Verification_email, Verification_facebook, Verification_google, Verification_phone, Platform)''')
Expand Down Expand Up @@ -284,6 +288,29 @@ def download_vinted_data(userids, s):
print(f"User {USER_ID} does not exists")
conn.close()

def get_all_depop_items(data, baseurl, slugs):
for i in data['products']:
slugs.append(i['slug'])
while True:

url = baseurl + f"&offset_id={data['meta']['last_offset_id']}"

print(url)
try:
data = requests.get(url).json()
# print(data)
except:
print(requests.get(url).text)
exit()
break
for i in data['products']:
# Prevent duplicates
if not i['slug'] in slugs:
slugs.append(i['slug'])
if data['meta']['end'] == True:
break
return slugs

def download_depop_data(userids):
Platform = "Depop"
for userid in userids:
Expand Down Expand Up @@ -339,25 +366,14 @@ def download_depop_data(userids):
baseurl = f"https://webapi.depop.com/api/v1/shop/{id}/products/?limit=200"
data = requests.get(baseurl).json()
print("Fetching all produts...")
for i in data['products']:
slugs.append(i['slug'])
while True:
slugs = get_all_depop_items(data, baseurl, slugs)

url = baseurl + f"&offset_id={data['meta']['last_offset_id']}"
if args.sold_items:
baseurl = f"https://webapi.depop.com/api/v1/shop/{id}/filteredProducts/sold?limit=200"
data = requests.get(baseurl).json()
get_all_depop_items(data, baseurl, slugs)

print(url)
try:
data = requests.get(url).json()
#print(data)
except:
print(requests.get(url).text)
exit()
break
for i in data['products']:
slugs.append(i['slug'])
if data['meta']['end'] == True:
print("Got all products. Start Downloading...")
break
print("Got all products. Start Downloading...")
print(len(slugs))
path = "downloads/" + str(userid) + '/'
try:
Expand All @@ -366,7 +382,7 @@ def download_depop_data(userids):
print("Creation of the directory %s failed or the folder already exists " % path)
for slug in slugs:
url = f"https://webapi.depop.com/api/v2/product/{slug}"
#print(url)
print(slug)
try:
product_data = requests.get(url).json()
except ValueError:
Expand All @@ -378,7 +394,20 @@ def download_depop_data(userids):
Gender = product_data['gender']
except KeyError:
Gender = None
Category = product_data['categoryId']
try:
Gender = product_data['gender']
except KeyError:
Gender = None
try:
Category = product_data['group']
except KeyError:
Category = product_data['categoryId']
try:
subcategory = product_data['productType']
except KeyError:
subcategory = None
address = product_data['address']
dateUpdated = product_data['dateUpdated']
try:
State = product_data['condition']['name']
except KeyError:
Expand All @@ -390,6 +419,12 @@ def download_depop_data(userids):
title = slug.replace("-"," ")

Colors = []
# Get discountedPriceAmount if available
try:
discountedPriceAmount = product_data['price']['discountedPriceAmount']
except KeyError:
discountedPriceAmount = None
pass
# Get colors if available
try:
for color in product_data['colour']:
Expand All @@ -416,45 +451,94 @@ def download_depop_data(userids):
for i in images:
full_size_url = i['url']
img_name = i['id']
print(img_name)

filepath = 'downloads/' + str(userid) + '/' + str(img_name) + '.jpg'
if not os.path.isfile(filepath):
print(full_size_url)
req = requests.get(full_size_url)
params = (
product_id, Sold, id, Gender, Category, ','.join(sizes), State, Brand, ','.join(Colors), Price, filepath, description, title, Platform)
if not args.disable_file_download:
if not os.path.isfile(filepath):
c.execute(
f"SELECT ID FROM Depop_Data WHERE ID = {product_id}")
result = c.fetchone()
if result:
# Already exists
c.execute('''UPDATE Depop_Data SET Image = ? WHERE ID = ?''', (filepath, product_id))
conn.commit()
req = requests.get(full_size_url)
with open(filepath, 'wb') as f:
f.write(req.content)
print(f"Image saved to {filepath}")
else:
print(img_name)
print(full_size_url)
req = requests.get(full_size_url)
params = (
product_id, Sold, id, Gender, Category, subcategory, ','.join(sizes), State, Brand, ','.join(Colors), Price, filepath, description, title, Platform, address, discountedPriceAmount, dateUpdated)
c.execute(
"INSERT OR IGNORE INTO Depop_Data(ID, User_id, Sold, Gender, Category, subcategory, size, State, Brand, Colors, Price, Image, Description, Title, Platform, Address, discountedPriceAmount, dateUpdated)VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
params)
conn.commit()
with open(filepath, 'wb') as f:
f.write(req.content)
print(f"Image saved to {filepath}")
else:
print('File already exists, skipped.')
elif args.disable_file_download:
c.execute(
"INSERT OR IGNORE INTO Data(ID, Sold, User_id, Gender, Category, size, State, Brand, Colors, Price, Images, description, title, Platform)VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
params)
conn.commit()
with open(filepath, 'wb') as f:
f.write(req.content)
print(f"Image saved to {filepath}")
else:
print('File already exists, skipped.')
f"SELECT ID FROM Depop_Data WHERE ID = {product_id}")
result = c.fetchone()
if result:
#Already exists
continue
else:
params = (
product_id, Sold, id, Gender, Category, subcategory, ','.join(sizes), State, Brand, ','.join(Colors),
Price, description, title, Platform, address, discountedPriceAmount, dateUpdated)
c.execute(
"INSERT OR IGNORE INTO Depop_Data(ID, Sold, User_id, Gender, Category, subcategory, size, State, Brand, Colors, Price, description, title, Platform, Address, discountedPriceAmount, dateUpdated)VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
params)
conn.commit()

# Download videos
if len(product_data['videos']) > 0:
for x in product_data['videos']:
for source in x['sources']:
if source['format'] == 'MP4':
video_url = source['url']
print(video_url)
file_name = video_url.split('/')[5]
filepath = 'downloads/' + str(userid) + '/' + str(file_name)
if not os.path.isfile(filepath):
req = requests.get(video_url)
params = (
product_id, Sold, id, Gender, Category, ','.join(sizes), State, Brand,
','.join(Colors), Price, filepath, description, title, Platform)
if not args.disable_file_download:
if not os.path.isfile(filepath):
req = requests.get(video_url)
print(video_url)
params = (
product_id, Sold, id, Gender, Category, subcategory, ','.join(sizes), State, Brand,
','.join(Colors), Price, filepath, description, title, Platform, address, discountedPriceAmount, dateUpdated)
c.execute(
"INSERT OR IGNORE INTO Depop_Data(ID, Sold, User_id, Gender, Category, subcategory, size, State, Brand, Colors, Price, Images, description, title, Platform, Address, discountedPriceAmount, dateUpdated)VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
params)
conn.commit()
with open(filepath, 'wb') as f:
f.write(req.content)
print(f"Video saved to {filepath}")
else:
if not args.disable_file_download:
print('File already exists, skipped.')
elif args.disable_file_download:
c.execute(
"INSERT OR IGNORE INTO Data(ID, Sold, User_id, Gender, Category, size, State, Brand, Colors, Price, Images, description, title, Platform)VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
params)
conn.commit()
with open(filepath, 'wb') as f:
f.write(req.content)
print(f"Video saved to {filepath}")
else:
print('File already exists, skipped.')
f"SELECT ID FROM Depop_Data WHERE ID = {product_id}")
result = c.fetchone()
if result:
# Already exists
continue
else:
params = (
product_id, Sold, id, Gender, Category, subcategory, ','.join(sizes), State,
Brand, ','.join(Colors),
Price, description, title, Platform, address, discountedPriceAmount, dateUpdated)
c.execute(
"INSERT OR IGNORE INTO Depop_Data(ID, Sold, User_id, Gender, Category, subcategory, size, State, Brand, Colors, Price, description, title, Platform, Address, discountedPriceAmount, dateUpdated)VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
params)
conn.commit()




Expand Down

0 comments on commit 9e7f8f8

Please sign in to comment.