Skip to content

Commit

Permalink
Handling missing content
Browse files Browse the repository at this point in the history
Script will now tell you if it couldn't find all the content it was supposed too and will retry automatically.

Was scraping Belle Delphine and it took 15 retries to finally get the first 100 posts.
Was roughly a <6 min wait for a successful retry.
  • Loading branch information
SecretShell committed Sep 13, 2020
1 parent 5417cab commit 424588b
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 31 deletions.
35 changes: 8 additions & 27 deletions helpers/main_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,33 +310,14 @@ def json_request(session, link, method="GET", stream=False, json_format=True, da
return result


# def restore_missing_data(sessions, media_set):
# count = 0
# set_count = len(media_set)
# for item in media_set:
# if not item:
# negative_count = count-1
# positive_count = count+1
# if negative_count > 0 and negative_count < set_count:
# print
# elif positive_count > 0 and positive_count < set_count:
# media_item = media_set[positive_count]
# s = [x["valid"] for x in media_item]
# a = list(chain(*s))
# a.sort(key=lambda x: x["post_id"])
# q = a[0]
# date_object = datetime.strptime(
# q["postedAt"], "%d-%m-%Y %H:%M:%S")
# postedAt = str(date_object.timestamp())
# print(postedAt)
# new_link = "ok"
# r = json_request(sessions[0], new_link)
# print
# else:
# print
# print
# print
# count += 1
def restore_missing_data(master_set2,media_set):
count = 0
new_set = []
for item in media_set:
if not item:
new_set.append(master_set2[count])
count += 1
return new_set


def get_config(config_path):
Expand Down
24 changes: 20 additions & 4 deletions modules/onlyfans.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ def prepare_scraper(sessions, site_name, item):
profile_scraper(link, sessions[0], directory, username)
return
if api_type == "Posts":
num = 50
num = 100
link = link.replace("limit=0", "limit="+str(num))
original_link = link
ceil = math.ceil(api_count / num)
Expand Down Expand Up @@ -433,9 +433,25 @@ def process_mass_messages(message, limit):
str(item["id"])+"?app-token="+app_token+""
master_set.append(link2)
master_set2 = main_helper.assign_session(master_set, len(sessions))
media_set = pool.starmap(media_scraper, product(
master_set2, [sessions], [directories], [username], [api_type]))
# media_set = main_helper.restore_missing_data(sessions, media_set)
media_set = []
count = len(master_set2)
while True:
media_set2 = pool.starmap(media_scraper, product(
master_set2, [sessions], [directories], [username], [api_type]))
media_set.extend(media_set2)
if count > 1:
faulty = [x for x in media_set2 if not x]
if not faulty:
print("Found: "+api_type)
break
else:
num = len(faulty)*100
print("Missing "+str(num)+" Posts... Retrying...")
master_set2 = main_helper.restore_missing_data(
master_set2, media_set2)
else:
print("No "+api_type+" Found.")
break
media_set = main_helper.format_media_set(media_set)
seen = set()

Expand Down

0 comments on commit 424588b

Please sign in to comment.