-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmklist-filmchest-com
executable file
·73 lines (67 loc) · 2.17 KB
/
mklist-filmchest-com
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env python
"""
Fetch list of public domain movies distributed by Film Chest.
Fetch IMDB title ID by searching for the title and year in IMDB.
"""
import argparse
import lxml.html
import movielib
import urllib2
import urlparse
def fetch_movie_list(args, list, url):
try:
root = lxml.html.fromstring(movielib.http_get_read(url))
except urllib2.HTTPError as e:
return None
for e in root.cssselect("div.browse-item"):
titletag = e.cssselect("div.con-holder a h2")[0]
title = titletag.text_content()
y = e.cssselect("div.con-holder h3")[0].text_content()
year = int(y.split('-')[0].split('/')[0])
availability = \
e.cssselect("div.act-holder dl dd")[0].text_content()
entryurl = urlparse.urljoin(url,
titletag.getparent().attrib['href'])
info = {
'status' : 'free',
'freenessurl' : entryurl,
'year' : year,
'title' : title,
'availability' : availability,
}
ref = entryurl
if args.imdblookup:
imdb = movielib.imdb_find_one(title, year)
if imdb:
ref = imdb
info['imdblookup'] = '%s %d' % (title, year)
print title, year, entryurl
list[ref] = info
print list[ref]
return list
def fetch_movie_lists(args):
# FIXME should get this list of categories from the web
sections = [
"1930-and-Older",
"1930s",
"1940s",
"1950s",
"1960s",
"1970s",
"1980s",
]
l = {}
for s in sections:
url = "http://www.filmchest.com/film-chest/films-by-date/filmlibrary/%s/?count=1000&start=1" % s
print url
l = fetch_movie_list(args, l, url)
return l
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--imdblookup', action='store_true', default=False,
help='also find title IDs by searching for title/year in IMDB')
args = parser.parse_args()
l = fetch_movie_lists(args)
movielib.savelist(l, name='free-movies-filmchest-com.json')
if __name__ == '__main__':
main()