diff --git a/README.md b/README.md index c368d09..bcd2e4e 100644 --- a/README.md +++ b/README.md @@ -41,9 +41,9 @@ To generate a new version of the schema.json file: prmd combine --meta schema/meta.json schema/voters.json > schema.json ``` -## Ruby client for the Brigade repo +## Ruby client -To create the ruby client for Brigade: +To create the ruby client : ``` heroics-generate VerifierAPI schema.json http://127.0.0.1:10012 > verifier_api.rb diff --git a/index.py b/index.py index e3ac4ab..8adfbc4 100644 --- a/index.py +++ b/index.py @@ -4,8 +4,8 @@ from pyelasticsearch import ElasticSearch from progressbar import ProgressBar, Percentage, Bar, ETA, RotatingMarker, Counter -***REMOVED*** -***REMOVED*** +from voter_verifier.config import ES_HOSTS, TIMEOUT, RETRIES, INDEX +from voter_verifier.indexing import index_voters, ensure_mapping_exists, aliased_index PII_map = {} # key: voterbase_id, value: the row @@ -91,7 +91,7 @@ def input_mapping(row, header_map): 'party': row[header_map[]] } -***REMOVED*** + data.update(address_mapping_address(row, header_map)) return data diff --git a/voter_verifier/indexing.py b/voter_verifier/indexing.py index dafd82d..006156e 100644 --- a/voter_verifier/indexing.py +++ b/voter_verifier/indexing.py @@ -6,8 +6,8 @@ from more_itertools import chunked from pyelasticsearch import ElasticHttpNotFoundError from verifier_date_utils import day_of_year -***REMOVED*** -***REMOVED*** +from voter_verifier.config import DOC_TYPE +from voter_verifier.synonyms import FIRST_NAME_SYNONYMS, ADDRESS_SYNONYMS logger = getLogger(__name__) @@ -198,7 +198,7 @@ def ensure_mapping_exists(index_name, es_client, force_delete=False, should_upda # Votizen disabled _source to save space. We might want to do that # too if performance is not sufficient or disk space is too # out-of-control. They claimed it dropped index size from 240 GB -> - # 45 GB. For the TargetSmart sample 50,000 records, enabling this + # 45 GB. For the sample 50,000 records, enabling this # results in an increase from 3.5 Kb -> 11.6 Mb. # # If we disable _source, we will need to find a new key-value store diff --git a/voter_verifier/matching.py b/voter_verifier/matching.py index 97b7c6d..67ddfef 100644 --- a/voter_verifier/matching.py +++ b/voter_verifier/matching.py @@ -6,9 +6,9 @@ from pyelasticsearch import ElasticSearch from datadog.dogstatsd import statsd -***REMOVED*** +from voter_verifier.zip_to_lat_lng import ZipToLatLng from verifier_date_utils import years_ago, NullableDate -***REMOVED*** +from voter_verifier.config import (ES_HOSTS, TIMEOUT, RETRIES, INDEX, DOC_TYPE, VERIFIER_MAX_RESULTS, STATSD_HOST, STATSD_PORT, DEFAULT_SEARCH_TYPE, SEARCH_TYPE_DISCOVER, SEARCH_TYPE_TOP, SEARCH_TYPE_AUTO_VERIFY,CONFIDENCE_INTERVAL_FOR_AUTO_VERIFICATION, @@ -1074,8 +1074,8 @@ def lookup_by_email(email, max_hits): def lookup_by_phone(input_phone, max_hits): """ find voter records for a given phone number -***REMOVED*** - Brigade stores the leadingg "+1" + US phone numbers are 10 numeric digits + Strips any leading "+1" """ phone = input_phone.replace('+', '')[-10:] if input_phone else '' if not phone: diff --git a/voter_verifier/random_matching.py b/voter_verifier/random_matching.py index 8e10d22..eb47593 100644 --- a/voter_verifier/random_matching.py +++ b/voter_verifier/random_matching.py @@ -2,8 +2,8 @@ from logging import getLogger, INFO from pyelasticsearch import ElasticSearch -***REMOVED*** -***REMOVED*** +from voter_verifier.matching import es_client, statsd +from voter_verifier.config import (ES_HOSTS, TIMEOUT, RETRIES, INDEX, DOC_TYPE, VERIFIER_MAX_RESULTS) diff --git a/voter_verifier/tests/test_index.py b/voter_verifier/tests/test_index.py index 0693f5f..f0dd5db 100644 --- a/voter_verifier/tests/test_index.py +++ b/voter_verifier/tests/test_index.py @@ -5,8 +5,8 @@ from django.utils.datetime_safe import date as safe_date from pyelasticsearch import ElasticSearch -***REMOVED*** -***REMOVED*** +from voter_verifier.indexing import _document_from_mapping, aliased_index +from voter_verifier.config import ES_HOSTS, TIMEOUT, RETRIES, INDEX es_client = ElasticSearch(ES_HOSTS, TIMEOUT, RETRIES) @@ -91,7 +91,7 @@ class IndexAliasTests(TestCase): """ Tests ensuring that the alias switchover works as intended """ def test_switchover_when_index_does_not_exist(self): -***REMOVED*** + dest_index = 'test_voter_verifier_alias' try: es_client.delete_index(dest_index) @@ -108,7 +108,7 @@ def test_switchover_when_index_does_not_exist(self): self.assertEqual(results['hits']['total'], 1) def test_switchover_twice(self): -***REMOVED*** + dest_index = 'test_voter_verifier_alias' try: ensure_mapping_exists(dest_index, es_client, force_delete=True) diff --git a/voter_verifier/tests/test_verification.py b/voter_verifier/tests/test_verification.py index 11c16cd..c1e0d90 100644 --- a/voter_verifier/tests/test_verification.py +++ b/voter_verifier/tests/test_verification.py @@ -11,13 +11,13 @@ from nose.tools import eq_ from pyelasticsearch import ElasticSearch from verifier_date_utils import years_ago, NullableDate, day_of_year -***REMOVED*** +from voter_verifier.config import (ES_HOSTS, TIMEOUT, RETRIES, INDEX, SEARCH_TYPE_DISCOVER, SEARCH_TYPE_TOP, SEARCH_TYPE_AUTO_VERIFY) -***REMOVED*** -***REMOVED*** +from voter_verifier.indexing import index_voters, ensure_mapping_exists +from voter_verifier.matching import (raw_elastic_voters, normalize_dob, match_many, match_one) -***REMOVED*** +from voter_verifier.zip_to_lat_lng import ZipToLatLng ROOT_DIR = path.abspath(path.dirname(__file__)) @@ -216,7 +216,7 @@ def test_registration_date(self): index=self.index_name) eq_(voter['registration_date'], '2016-02-08') -***REMOVED*** + def test_ts_state_match_one(self): best_match = fake_voter(first_name='Lewis', last_name='Clark', st='CA', @@ -242,7 +242,7 @@ def test_registration_date(self): index=self.index_name) eq_(voter['id'], best_match['id']) -***REMOVED*** + def test_ts_state_match_many(self): best_match = fake_voter(first_name='Gary', last_name='Kramer', st='CA', diff --git a/voter_verifier/zip_to_lat_lng.py b/voter_verifier/zip_to_lat_lng.py index e6f350f..c10da3b 100644 --- a/voter_verifier/zip_to_lat_lng.py +++ b/voter_verifier/zip_to_lat_lng.py @@ -2,7 +2,7 @@ from collections import namedtuple -***REMOVED*** +from voter_verifier.config import ZIP_TO_LAT_LNG_FILE_NAME LatLng = namedtuple('LatLng', ['lat', 'lng']) diff --git a/web.py b/web.py index 13813c7..213f4ee 100644 --- a/web.py +++ b/web.py @@ -12,9 +12,9 @@ from verifier_date_utils import NullableDate -***REMOVED*** -***REMOVED*** -***REMOVED*** +from voter_verifier.matching import from_elasticsearch_mapping, match_many, lookup_by_email, lookup_by_phone, es_client, statsd +from voter_verifier.random_matching import match_random_addresses, es_client +from voter_verifier.config import INDEX, SENTRY_DSN, DOC_TYPE app = Flask(__name__) sentry = RavenClient(SENTRY_DSN) @@ -284,8 +284,8 @@ def health(kind=None): """ status = es_client.status(INDEX)['indices'] -***REMOVED*** -***REMOVED*** + # We use index aliases to map "voter_verifier" -> + # "voter_verifier_1234...". The real name is returned by the status # endpoint, so we'll just assume that if an index is returned here, it is # the correct one. status = status[status.keys()[0]]