-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmklist-desertislandfilms-xls
executable file
·127 lines (113 loc) · 4.29 KB
/
mklist-desertislandfilms-xls
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Based on info from
# https://blogs.harvard.edu/rprasad/2014/06/16/reading-excel-with-python-xlrd/
import argparse
import xlrd
import movielib
def dump_all(sh):
# Print all values, iterating through rows and columns
#
num_cols = sh.ncols # Number of columns
for row_idx in range(0, sh.nrows): # Iterate through rows
print ('-'*40)
print ('Row: %s' % row_idx) # Print row number
for col_idx in range(0, num_cols): # Iterate through columns
cell_obj = sh.cell(row_idx, col_idx) # Get cell object by row, col
print ('Column: [%s] cell_obj: [%s]' % (col_idx, cell_obj))
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--imdblookup', action='store_true', default=False,
help='also find title IDs by searching for title/year in IMDB')
args = parser.parse_args()
entries = {}
"Film List"
"http://www.desertislandfilms.com/httpwww-desertislandfilms-comwp-contentuploads201211desert-island-films-list-u-1-xlsx-downloadall-viewermicrosoft/"
"http://www.desertislandfilms.com/wp-content/uploads/2012/11/Desert-Island-Films-List-R.xlsx"
# Ignore tabs without movies or with movies without year
ignore = (
'Table of Contents',
'Programming Guide',
'Adult',
'Cartoons',
'Christmas Cartoons',
# 'Documentary',
# 'Drama-Comedy',
# 'HI DEF',
# 'Horror-SCI-FI',
# 'Martial Arts',
# 'New Titles',
# 'Serial',
# 'Short Subject',
'Spanish Dubbed',
# 'TV Show',
# 'Western',
# 'Jackie Chan Collection',
'Sheet2',
)
book = xlrd.open_workbook("Desert-Island-Films-List-R.xlsx")
sheet_names = book.sheet_names()
for sheetname in sheet_names:
if sheetname in ignore:
continue
sh = book.sheet_by_name(sheetname)
print("Tab '%s' %d x %d" % (sheetname, sh.nrows, sh.ncols))
#dump_all(sh)
# Locate header
num_cols = sh.ncols # Number of columns
for hrow in (0, 1):
for hcol in range(0, num_cols):
cell_obj = sh.cell(hrow, hcol)
if unicode == type(cell_obj.value):
if cell_obj.value.lower() == 'year':
headerrow = hrow
print(headerrow)
hfield = []
for col_idx in range(0, num_cols):
hfield.append(sh.cell(headerrow, col_idx).value.lower())
print(hfield)
fieldmap = {
'<--back': None,
'hi def titles, from 35mm': 'title',
}
for row_idx in range(headerrow+1, sh.nrows):
entry = {}
for hcol in range(0, num_cols):
fieldname = hfield[hcol]
if fieldname in fieldmap:
fieldname = fieldmap[fieldname]
v = sh.cell(row_idx, hcol).value
if fieldname and v:
if unicode == type(v):
v = v\
.replace(u"\u2018", "'") \
.replace(u"\u2019", "'")
entry[fieldname] = v
if 'title' not in entry:
continue
entry['category'] = [sheetname]
entry['status'] = 'free'
if 'year' in entry:
year = int(entry['year'])
entry['year'] = year
else:
year = None
if 'min' in entry:
entry['min'] = int(entry['min'])
imdb = "entry-%s-%s" % (sheetname, row_idx)
if args.imdblookup:
title = entry['title']
print("IMDB search for %s %s" % (title, year))
found = movielib.imdb_find_one(title, year)
if found:
entry['imdblookup'] = '%s %d' % (title, year)
imdb = found
if imdb in entries:
if sheetname not in entries[imdb]['category']:
entries[imdb]['category'].append(sheetname)
else:
entries[imdb] = entry
print('"%s": %s' % (imdb, entry))
movielib.savelist(entries, 'free-movies-desertislandfilms-xsd.json')
if __name__ == '__main__':
main()