Skip to content

Commit

Permalink
developerweek.com added
Browse files Browse the repository at this point in the history
  • Loading branch information
pgayane committed Apr 14, 2014
1 parent 0d39d8a commit d009ddd
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 0 deletions.
1 change: 1 addition & 0 deletions pycon_speakers/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def _cleanup_name(name):
>>> _cleanup_name(u'Ivan Krstic / Harvard University (presently..)')
u'Ivan Krstic'
"""
name = name.replace('\t', ' ')
name = _STRIPRE1.sub(u'', name, re.I)
return _STRIPRE2.sub(u'', name, re.I)

Expand Down
41 changes: 41 additions & 0 deletions pycon_speakers/spiders/developerweek_com.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@


from urlparse import urljoin

from scrapy.spider import Spider
from scrapy.selector import Selector
from scrapy.http import Request

from pycon_speakers.loaders import SpeakerLoader


class PyConSpider(Spider):
name = 'developerweek.com'
base_url = "http://confreaks.com/"

def start_requests(self):
# url = "http://developerweek2014conferenceexpo.sched.org/directory/speakers"
# meta = {'year': '2014', 'conference': self.name}
# yield Request(url, meta=meta,
# callback=self._parse_2014)

url = "http://www.developerweek.com/2013-sf/index/allspeakers"
meta = {'year': '2013', 'conference': self.name}
yield Request(url, meta=meta,
callback=self._parse_2013)

def _parse_2013(self, response):
for section in Selector(response).xpath("//div[@class='data-mid2']/h2[1]/a[1]"):
il = SpeakerLoader(selector=section)
il.add_xpath('name', ".")
il.add_value('conference', str(response.meta['conference']))
il.add_value('year', str(response.meta['year']))
yield il.load_item()

def _parse_2014(self, response):
for section in Selector(response).xpath("//div[@class='sched-person']"):
il = SpeakerLoader(selector=section)
il.add_xpath('name', "./h2/a")
il.add_value('conference', str(response.meta['conference']))
il.add_value('year', str(response.meta['year']))
yield il.load_item()

0 comments on commit d009ddd

Please sign in to comment.