-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsteam_wishlist_extractor.py
243 lines (183 loc) · 7.35 KB
/
steam_wishlist_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
### Grabs prices from the specified steam wishlist and logs them in a db daily.
import platform
import json
import datetime
import sqlite3
import importlib.util
import requests
from bs4 import BeautifulSoup
# example query for monitoring
dqdqdqdqdqd = """
select date, count(*)
from games
where date >= date('now', '-8 day')
group by date
order by date asc"""
# generating a date range. watch out for off by one errors.
q_date_range = """
WITH RECURSIVE dates(date) AS (
VALUES(date('now', '-8 day'))
UNION ALL
SELECT date(date, '+1 day')
FROM dates
WHERE date < date('now')
)
SELECT date FROM dates;
"""
# finding discounts
disc_query = """
select games.*, max_price_30d, avg_price_30d,
round(100*(1-sale_price/avg_price_30d),0) as discount_30d
from games
join (select app_id, avg(orig_price) as avg_price_30d, max(orig_price) as max_price_30d
from games
where date > date('now', '-30 day')
group by app_id) ga on games.app_id=ga.app_id
where games.date = (select max(date) from games)
and (games.orig_price < avg_price_30d-0.05 or games.sale_price < avg_price_30d-0.05)
"""
# ------------------------------------------
# CONFIG
steam_wishlist_url = "put your public wishlist URL here"
# used to construct store page links in conjunction with app IDs
store_base_url = "https://store.steampowered.com/app/"
# sqlite db where we'll store this
dbfile = "steam_wishlist.db"
verbose = 0
win_dblib_location = "C:/pylib/sqlitelib.py"
linux_dblib_location = "/mnt/c/pylib/sqlitelib.py"
# ------------------------------------------
# custom path for local library imports
platform = platform.system()
if platform == 'Linux':
dblib_location = linux_dblib_location
else:
dblib_Location = win_dblib_location
spec = importlib.util.spec_from_file_location("sqlitelib", dblib_location)
dblib = importlib.util.module_from_spec(spec)
spec.loader.exec_module(dblib)
db = dblib.sqlitelib(dbfile)
# ------------------------------------------
def extract_wishlist_items(items_json):
# given wishlist items parsed from json, extract prices etc.
game_ids = list(items_json.keys())
if len(game_ids) < 1:
print("error: no wishlist items returned")
return None
else:
# price extraction logic:
# skip if free, denoted by 'free' = 1
# skip if pre-release denoted by 'prerelease' = 1
wishlist = []
for i in game_ids:
row = {}
wish_item = items_json[i]
row["id"] = i
row["name"] = wish_item["name"]
row["store_link"] = store_base_url + i
# there's an 'id' field in the subs dict. seems to be an ID of a particular item for the "app",
# for when there might be multiple.
# subs: {discount_block(orig & final price), discount_pct, id, price}
# sometimes there's more than one sub, each with its own id.
# I'm *assuming* that the first sub is always the base item. this may not be a correct assumption,
# but I'm not seeing anything that doesn't adhere to this at the moment.
# also assuming I want the base game, not any add-ons or whatever.
if "free" in wish_item:
if wish_item["free"] == 1:
continue
elif "prerelease" in wish_item:
if wish_item["prerelease"] == 1:
continue
try:
if "discount_block" in wish_item["subs"][0]:
# assumption described above is here
discblk = wish_item["subs"][0]["discount_block"]
try:
bsoup = BeautifulSoup(discblk, "html.parser")
if "discount_original_price" in discblk:
# item is on sale
orig_price = float(
bsoup.select_one(
".discount_original_price"
).text.replace("$", "")
)
sale_price = float(
bsoup.select_one(".discount_final_price").text.replace(
"$", ""
)
)
disc_pct = round(
1 - (orig_price - sale_price) / orig_price, 3
)
else:
# not on sale
orig_price = float(
bsoup.select_one(".discount_final_price").text.replace(
"$", ""
)
)
sale_price = None
disc_pct = None
except:
print("error parsing discount block for row:\n", row)
print(discblk, "\n")
row["orig_price"] = orig_price
row["sale_price"] = sale_price
row["disc_pct"] = disc_pct
else:
print("-- NO DISCOUT BLOCK FOUND --\n", row)
pprint(wish_item) # ['subs'])
print("")
except:
print("err323423")
print(wish_item)
# pprint(ws_arr[i]['subs'])
# print()
wishlist.append(row)
return wishlist
def get_wishlist_page():
# gets wishlist items, names, etc. from the wishlist page.
wishlist_html = requests.get(steam_wishlist_url)
soup = BeautifulSoup(wishlist_html.text, "html.parser")
# items = soup.find_all('div', attrs={'class': 'wishlistRowItem'}) # old
items_json = soup.find_all("script")
if len(items_json) > 1:
# get json array items for wishlist.
ws_arr = None
for n, i in enumerate(items_json):
element = str(i)
if "g_rgAppInfo" in element:
e1 = element.split("var ")
for i in e1:
if "g_rgAppInfo" in i:
e2 = i.split("g_rgAppInfo = ")[1]
e3 = e2.rstrip().rstrip(";")
if len(e3) > 0:
ws_arr = json.loads(e3)
break
return extract_wishlist_items(ws_arr)
def insert_items(wishlist):
# Inserts items into the sqlite db.
# add date to wishlist
curdate = datetime.date.today()
for i in range(len(wishlist)):
wishlist[i]["date"] = curdate
sqlite_table_ddl = """CREATE TABLE IF NOT EXISTS games (
date text NOT NULL,
app_id integer NOT NULL,
name text NOT NULL,
orig_price REAL NOT NULL,
sale_price REAL,
discount REAL,
store_link text NOT NULL,
PRIMARY KEY (date, app_id)
)"""
db.execute(sqlite_table_ddl)
insert_query = """insert or ignore into games(date, app_id, name,
orig_price, sale_price, discount, store_link)
values (:date, :id, :name, :orig_price, :sale_price, :disc_pct, :store_link)
""" # ON CONFLICT IGNORE"""
db.executemany(insert_query, wishlist)
### MAIN:
wishlist = get_wishlist_page()
insert_items(wishlist)