From 8183bac6daa4eb5256172e692ac7d0ce193ca1db Mon Sep 17 00:00:00 2001 From: Pablo Castellano Date: Sun, 31 Jan 2016 00:50:12 +0100 Subject: [PATCH] elasticsearch: asciifolding (issue #10) --- borme/search_backends.py | 42 ++++++++++++++++++++++++++++++++++++++++ libreborme/settings.py | 4 ++-- 2 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 borme/search_backends.py diff --git a/borme/search_backends.py b/borme/search_backends.py new file mode 100644 index 0000000..5605a28 --- /dev/null +++ b/borme/search_backends.py @@ -0,0 +1,42 @@ +from haystack.backends.elasticsearch_backend import ElasticsearchSearchBackend +from haystack.backends.elasticsearch_backend import ElasticsearchSearchEngine + + +class AsciifoldingElasticBackend(ElasticsearchSearchBackend): + + def __init__(self, *args, **kwargs): + super(AsciifoldingElasticBackend, self).__init__(*args, **kwargs) + analyzer = { + "ascii_analyser": { + "tokenizer": "standard", + "filter": ["standard", "asciifolding", "lowercase"] + }, + "ngram_analyzer": { + "type": "custom", + "tokenizer": "lowercase", + "filter": ["haystack_ngram", "asciifolding"] + }, + "edgengram_analyzer": { + "type": "custom", + "tokenizer": "lowercase", + "filter": ["haystack_edgengram", "asciifolding"] + } + } + self.DEFAULT_SETTINGS['settings']['analysis']['analyzer'] = analyzer + + def build_schema(self, fields): + content_field_name, mapping = super(AsciifoldingElasticBackend, self).build_schema(fields) + + for field_name, field_class in fields.items(): + field_mapping = mapping[field_class.index_fieldname] + + if field_mapping['type'] == 'string' and field_class.indexed: + if not hasattr(field_class, 'facet_for') and not field_class.field_type in('ngram', 'edge_ngram'): + field_mapping['analyzer'] = "ascii_analyser" + + mapping.update({field_class.index_fieldname: field_mapping}) + return (content_field_name, mapping) + + +class AsciifoldingElasticSearchEngine(ElasticsearchSearchEngine): + backend = AsciifoldingElasticBackend diff --git a/libreborme/settings.py b/libreborme/settings.py index b6f53d0..c6f456a 100644 --- a/libreborme/settings.py +++ b/libreborme/settings.py @@ -109,9 +109,9 @@ # haystack search using elasticsearch HAYSTACK_CONNECTIONS = { 'default': { - 'ENGINE': 'haystack.backends.elasticsearch_backend.ElasticsearchSearchEngine', + 'ENGINE': 'borme.search_backends.AsciifoldingElasticSearchEngine', 'URL': 'http://127.0.0.1:9200/', - 'INDEX_NAME': 'haystack', + 'INDEX_NAME': 'libreborme', }, }