Skip to content

Commit

Permalink
Decreased amount of posts to get at once
Browse files Browse the repository at this point in the history
Script will grab 50 posts at once instead of 100
  • Loading branch information
SecretShell committed Sep 13, 2020
1 parent 24b33ee commit 5417cab
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 6 deletions.
1 change: 1 addition & 0 deletions datascraper/main_datascraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ def start_datascraper():
if names:
print("Names: Username = username | "+subscription_array[1])
if not auto_scrape_names:
value = "2"
value = input().strip()
if value.isdigit():
if value == "0":
Expand Down
40 changes: 35 additions & 5 deletions helpers/main_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,10 @@ def session_retry_rules(r, link):
return boolean


def json_request(session, link, method="GET", stream=False, json_format=True, data={}, sleep=True, timeout=10):
def json_request(session, link, method="GET", stream=False, json_format=True, data={}, sleep=True, timeout=20):
session = session_rules(session, link)
count = 0
sleep_number = random.randint(2, 5)
sleep_number = 0.5
result = {}
while count < 11:
try:
Expand Down Expand Up @@ -302,13 +302,43 @@ def json_request(session, link, method="GET", stream=False, json_format=True, da
except (requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError, requests.exceptions.ReadTimeout, socket.timeout) as e:
if sleep:
time.sleep(sleep_number)
sleep_number += 0.5
continue
except Exception as e:
log_error.exception(e)
continue
return result


# def restore_missing_data(sessions, media_set):
# count = 0
# set_count = len(media_set)
# for item in media_set:
# if not item:
# negative_count = count-1
# positive_count = count+1
# if negative_count > 0 and negative_count < set_count:
# print
# elif positive_count > 0 and positive_count < set_count:
# media_item = media_set[positive_count]
# s = [x["valid"] for x in media_item]
# a = list(chain(*s))
# a.sort(key=lambda x: x["post_id"])
# q = a[0]
# date_object = datetime.strptime(
# q["postedAt"], "%d-%m-%Y %H:%M:%S")
# postedAt = str(date_object.timestamp())
# print(postedAt)
# new_link = "ok"
# r = json_request(sessions[0], new_link)
# print
# else:
# print
# print
# print
# count += 1


def get_config(config_path):
if os.path.isfile(config_path):
if os.stat(config_path).st_size > 0:
Expand Down Expand Up @@ -413,13 +443,13 @@ def grouper(n, iterable, fillvalue=None):
return list(zip_longest(fillvalue=fillvalue, *args))


def assign_session(medias, number):
def assign_session(medias, number, key_one="link", key_two="count"):
count = 0
medias2 = []
for auth in medias:
media2 = {}
media2["link"] = auth
media2["count"] = count
media2[key_one] = auth
media2[key_two] = count
medias2.append(media2)
count += 1
if count == number:
Expand Down
3 changes: 2 additions & 1 deletion modules/onlyfans.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ def prepare_scraper(sessions, site_name, item):
profile_scraper(link, sessions[0], directory, username)
return
if api_type == "Posts":
num = 100
num = 50
link = link.replace("limit=0", "limit="+str(num))
original_link = link
ceil = math.ceil(api_count / num)
Expand Down Expand Up @@ -435,6 +435,7 @@ def process_mass_messages(message, limit):
master_set2 = main_helper.assign_session(master_set, len(sessions))
media_set = pool.starmap(media_scraper, product(
master_set2, [sessions], [directories], [username], [api_type]))
# media_set = main_helper.restore_missing_data(sessions, media_set)
media_set = main_helper.format_media_set(media_set)
seen = set()

Expand Down

0 comments on commit 5417cab

Please sign in to comment.