Skip to content

Commit

Permalink
Add spider for next day video
Browse files Browse the repository at this point in the history
  • Loading branch information
shaneaevans committed Apr 14, 2014
1 parent 73d2d10 commit 06cf424
Showing 1 changed file with 30 additions and 0 deletions.
30 changes: 30 additions & 0 deletions pycon_speakers/spiders/nextdayvideo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import json, re
from scrapy.spider import Spider
from pycon_speakers.items import Speaker


class NextDayVideoSpider(Spider):
name = 'nextdayvideo.com'
start_urls = ['http://veyepar.nextdayvideo.com/api/csp/?format=json']

def parse(self, response):
for conference in json.loads(response.body_as_unicode()):
conference_name = conference['name']
for show_set in conference['show_set']:
set_name = show_set['name']
year_match = re.search('20\d\d', set_name)
if not year_match:
self.log("skipping %s, set %s: missing year" %
(conference_name, set_name))
continue
year = year_match.group()
for episode in show_set['episode_set']:
authors = episode.get('authors')
if not authors:
# lightning talks, panels, etc.
continue
yield Speaker(
name=authors,
conference=conference_name,
year=year
)

0 comments on commit 06cf424

Please sign in to comment.