Skip to content

Commit

Permalink
only speakers: added 2011 arg pycon
Browse files Browse the repository at this point in the history
  • Loading branch information
eLRuLL committed Apr 21, 2014
1 parent 3c26fba commit 837e3c0
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions pycon_speakers/spiders/ar_pycon.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,26 @@ class ArPyconSpider(Spider):
"""
name = 'ar.pycon.org'
from_year = 2011
base_url = 'http://ar.pycon.org/{year}/stats/attendees'
base_url = 'http://ar.pycon.org/{year}/schedule/index'

def start_requests(self):
current_year = date.today().year
current_year = date.today().year - 2
for year in range(self.from_year, current_year):
url = self.base_url.format(year=year)
yield Request(url)

def parse(self, response):
selector = Selector(response)
year = re.search(r'/(\d+)/', response.url).group(1)

speakers = []
for i, bad_name in enumerate(selector.xpath('//div[@style]//span[position()=1]/text()').extract()):
if i % 2 != 0:
name = reversed([a.strip() for a in bad_name.split(",")])
speaker = " ".join(name)
speakers.append(speaker)

return [Speaker(name=speaker,
conference=self.name,
year=year)
for speaker in selector.xpath('//table[position()>1]'
'//tr[position()>1]//td[position()=1]//text()').extract()]
for speaker in speakers]

0 comments on commit 837e3c0

Please sign in to comment.