Skip to content

Commit

Permalink
Merge pull request #24 from bertinatto/pythonbrazil
Browse files Browse the repository at this point in the history
Add Python Brazil spider
  • Loading branch information
pablohoffman committed Apr 18, 2014
2 parents 46eea45 + dbba121 commit b0acd1c
Showing 1 changed file with 47 additions and 0 deletions.
47 changes: 47 additions & 0 deletions pycon_speakers/spiders/pythonbrazil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from scrapy.spider import Spider
from scrapy.http import Request
from scrapy.selector import Selector

from pycon_speakers.items import Speaker


class PythonBrazilSpider(Spider):
name = 'pythonbrazil'

def __init__(self):
self.conferences = {
'9': {
'conference_name': 'Python Brazil [9]',
'url': 'http://2013.pythonbrasil.org.br/program/confirmed-talks',
'callback': self.parse_2013,
'callback_talk': self.parse_talk_2013,
'year': '2013',
},
}

def start_requests(self):
for year in self.conferences:
conference = self.conferences[year]
yield Request(conference['url'], meta={'conference': conference},
callback=conference['callback'])

def parse_2013(self, response):
hxs = Selector(response)
conference = response.meta['conference']
for talk in hxs.xpath('//table[contains(@class, "listing")]/tbody/'
'tr/td[1]/a'):
url = ''.join(talk.xpath('./@href').extract())
yield Request(url, meta={'conference': conference},
callback=conference['callback_talk'])

def parse_talk_2013(self, response):
hxs = Selector(response)
speaker = Speaker()
conference = response.meta['conference']
speaker['name'] = ''.join(hxs.xpath('//span[contains(@class,'
'"speaker_name")]/text()')
.extract())
speaker['conference'] = conference['conference_name']
speaker['year'] = conference['year']
yield speaker

0 comments on commit b0acd1c

Please sign in to comment.