Skip to content


Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
SecretShell committed Jun 24, 2019
0 parents commit c159a11
Show file tree
Hide file tree
Showing 7 changed files with 809 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Auto detect text files and perform LF normalization
* text=auto
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

Binary file added Examples/1.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
674 changes: 674 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

116 changes: 116 additions & 0 deletions
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import requests
from bs4 import BeautifulSoup
from urllib.request import urlretrieve
import os
from multiprocessing.dummy import Pool as ThreadPool
from multiprocessing import current_process, Pool
import multiprocessing
from itertools import product
import re
import json

# You MUST fill all of this in for the script to work :D
json_data = json.load(open('settings.json'))
j_directory = json_data['directory']+"/Users/"
app_token = json_data['app-token']
sess = json_data['sess']
user_agent = json_data['user-agent']

# You don't have to fill anything else in below this line :)
auth_cookie = {
'domain': '',
'expires': None,
'name': 'sess',
'path': '/',
'value': sess,
'version': 0

session = requests.Session()
session.headers = {
'User-Agent': user_agent, 'Referer': ''}

def link_check(link):
r = session.get(link)
raw_html = r.content
html = BeautifulSoup(raw_html, 'html.parser')
temp_user_id = html.find("div", {"class": "b-users"}).find("a", attrs={"data-user", True})
if temp_user_id:
return temp_user_id["data-user"]
return False

def scrape_choice():
print('Scrape: a = Everything | b = Images | c = Videos')
input_choice = input()
image_api = ""+user_id+"/posts/photos?limit=1000&offset=0&order=publish_date_" \
video_api = ""+user_id+"/posts/videos?limit=1000&offset=0&order=publish_date_" \
if input_choice == "a":
location = "/Images/"
media_scraper(image_api, location, j_directory)
print("Photos Finished")
location = "/Videos/"
media_scraper(video_api, location, j_directory)
print("Videos Finished")
if input_choice == "b":
location = "/Images/"
media_scraper(image_api, location, j_directory)
if input_choice == "c":
location = "/Videos/"
media_scraper(video_api, location, j_directory)

def media_scraper(link, location, directory):
r = session.get(link)
y = json.loads(r.text)

media_set = dict([])
media_count = 0
for media_api in y:
for media in media_api["media"]:
file = media["source"]["source"]
media_set[media_count] = {}
media_set[media_count]["link"] = file
media_count += 1

if "/Users/" == directory:
directory = os.path.dirname(os.path.realpath(__file__))+"/Users/"+username+location
directory = directory+username+location

print("DIRECTORY - " + directory)
if not os.path.exists(directory):

max_threads = multiprocessing.cpu_count()
pool = ThreadPool(max_threads)
pool.starmap(download_media, product(media_set.items(), [directory]))

def download_media(media, directory):
link = media[1]["link"]
file_name = link.rsplit('/', 1)[-1]
urlretrieve(link, directory + file_name)

while True:
print('Input a username or profile link')
input_link = input().strip()
username = input_link.rsplit('/', 1)[-1]
input_link = ''+username
user_id = link_check(input_link)
if not user_id:
print("User Not Found")
print("First time? Did you forget to edit your settings.json file?")
14 changes: 14 additions & 0 deletions
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Open settings.json


// Directory - Leave directory empty if you want files to be downloaded in the script folder

// Directory - If you're going to fill, please remember to use forward ("/") slashes only


// It is MANDATORY to fill everything else in otherwise the script won't work

// After logging in on the browser, go to, then check the examples folder on how to get app-token, sess and user-agent
1 change: 1 addition & 0 deletions settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

0 comments on commit c159a11

Please sign in to comment.