diff --git a/README.md b/README.md index c404785..0b43728 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,8 @@ Other tasks: 2. Review crawled data and fix spiders when the data is incorrect 3. Chart results +Here what we have for now: +![Gender Bar Chart](/gender_plot.png) Running the Scrapy Code ----------------------- diff --git a/gender_plot.png b/gender_plot.png new file mode 100644 index 0000000..b162a53 Binary files /dev/null and b/gender_plot.png differ diff --git a/pycon_speakers/loaders.py b/pycon_speakers/loaders.py index bdb2085..2c97ade 100644 --- a/pycon_speakers/loaders.py +++ b/pycon_speakers/loaders.py @@ -23,8 +23,8 @@ def _cleanup_name(name): name = _STRIPRE1.sub(u'', name, re.I) return _STRIPRE2.sub(u'', name, re.I) -_STRIPRE1 = re.compile(ur'\s*(\(.*\))?( bio)?( -)?( \.)?$') -_STRIPRE2 = re.compile(ur'\s*(/.+)$') +_STRIPRE1 = re.compile(ur'\s*(\(.*\))?( bio)?( -)?( \.)?$', re.DOTALL) +_STRIPRE2 = re.compile(ur'\s*(/.+)$', re.DOTALL) class SpeakerLoader(ItemLoader):