Skip to content

Commit

Permalink
confreak.com spider added
Browse files Browse the repository at this point in the history
  • Loading branch information
pgayane committed Apr 14, 2014
1 parent d1326be commit 0d39d8a
Showing 1 changed file with 39 additions and 0 deletions.
39 changes: 39 additions & 0 deletions pycon_speakers/spiders/confreaks_com.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@


from urlparse import urljoin

from scrapy.spider import Spider
from scrapy.selector import Selector
from scrapy.http import Request

from pycon_speakers.loaders import SpeakerLoader


class PyConSpider(Spider):
name = 'confreak.com'
base_url = "http://confreaks.com/"

def start_requests(self):
url = "http://confreaks.com/events"
yield Request(url, callback=self._parse_events)


def _parse_events(self, response):
for event in Selector(response).xpath("//div[@class = 'event-box-inner']"):
eventname = event.xpath('./span/strong/a/text()').extract()[0]
year = eventname[-4:]
if year.isdigit():
conf_name = eventname[:-4]
video_url = event.xpath('./a/@href').extract()[0]
meta = {'year': year, 'conference': conf_name}
yield Request(self.base_url + video_url, meta=meta,
callback=self._parse_video)


def _parse_video(self, response):
for section in Selector(response).xpath("//div[@class = 'videos']//div[@class = 'presenters']/a"):
il = SpeakerLoader(selector=section)
il.add_xpath('name', ".")
il.add_value('conference', str(response.meta['conference']))
il.add_value('year', str(response.meta['year']))
yield il.load_item()

0 comments on commit 0d39d8a

Please sign in to comment.