Skip to content

Commit

Permalink
added ar.pycon.org spider for Pycon in Argentina
Browse files Browse the repository at this point in the history
  • Loading branch information
eLRuLL committed Apr 20, 2014
1 parent a21e314 commit 3c26fba
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 1 deletion.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
.scrapy
project.egg-info/
build/

venv/
.idea/
*.json
31 changes: 31 additions & 0 deletions pycon_speakers/spiders/ar_pycon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from scrapy.spider import Spider
from scrapy.http import Request
from scrapy.selector import Selector

from pycon_speakers.items import Speaker

from datetime import date
import re


class ArPyconSpider(Spider):
"""A spider to crawl Argentinian Pycon conference speakers.
"""
name = 'ar.pycon.org'
from_year = 2011
base_url = 'http://ar.pycon.org/{year}/stats/attendees'

def start_requests(self):
current_year = date.today().year
for year in range(self.from_year, current_year):
url = self.base_url.format(year=year)
yield Request(url)

def parse(self, response):
selector = Selector(response)
year = re.search(r'/(\d+)/', response.url).group(1)
return [Speaker(name=speaker,
conference=self.name,
year=year)
for speaker in selector.xpath('//table[position()>1]'
'//tr[position()>1]//td[position()=1]//text()').extract()]

0 comments on commit 3c26fba

Please sign in to comment.