diff --git a/Dockerfile b/Dockerfile index 8bf1069..1563c75 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,6 +8,8 @@ RUN touch /var/lib/rpm/{,*} \ RUN useradd app \ && wget -O /usr/bin/gosu https://github.com/tianon/gosu/releases/download/1.4/gosu-amd64 \ && chmod +x /usr/bin/gosu +***REMOVED*** + && chmod +x /usr/bin/pv RUN easy_install virtualenv ADD entrypoint.sh /usr/local/bin/entrypoint ENTRYPOINT ["/usr/local/bin/entrypoint"] diff --git a/deploy.yml b/deploy.yml index ee717e4..1e59915 100644 --- a/deploy.yml +++ b/deploy.yml @@ -51,6 +51,42 @@ default: &default uris: - '<%= $deploy_variables[:nexus_url] %>' + chronos: + verifier_reindex_all: + # the "name" and "environmentVariables" keys are added at deploy-time + epsilon: PT60S + executor: '' + executorFlags: '' + retries: 2 +***REMOVED*** + ownerName: '' + async: false + cpus: 1.0 + disk: 256.0 + mem: 1024.0 + softError: false + dataProcessingJobType: false + uris: + - '<%= $deploy_variables[:nexus_url] %>' + highPriority: false + runAsUser: root + # TODO: This command should be baked into the Docker image (as its + # entrypoint), however a Chronos bug prevents us from using `shell: false` + # and simply passing `arguments`. https://github.com/mesos/chronos/issues/567 + command: '/usr/local/bin/entrypoint' + container: + type: docker + image: '<%= $deploy_variables[:docker_image] %>' + network: BRIDGE + scheduleTimeZone: UTC + # TODO: Pass these arguments into the ./bin/run script. This is blocked + # on a Chronos bug which adds the value of the arguments array to the + # task ID which fails because slashes are invalid in task IDs. + # https://github.com/mesos/chronos/issues/568 + arguments: ['bash', 'index_all.sh'] +***REMOVED*** + schedule: R/2015-06-20T00:00:00.000Z/P1M # every month + ############################################################################### # EDGE ############################################################################### diff --git a/index.py b/index.py index f2abc9c..494ce18 100644 --- a/index.py +++ b/index.py @@ -129,7 +129,7 @@ def index_records(index_name, voters): ***REMOVED*** - if len(voters) >= 1000000: + if len(voters) >= 100000: index_records(index, voters) voters = [] diff --git a/index_all.sh b/index_all.sh index eaf5788..aa071b2 100755 --- a/index_all.sh +++ b/index_all.sh @@ -1,14 +1,8 @@ #!/usr/bin/env bash -set -e +set -exuo pipefail -echo "Finding total number of records..." >&2 -# Cached value from running the below command: -total=228459351 -***REMOVED*** -# --user=brigade_media --password=$TARGETSMART_PASSWORD -O - | awk '{ sum+=$1} END {print sum}') -echo "... ${total} records" >&2 - -for FILE in $(python list_files.py); do +total=230000000 # <- approximately correct value: +for FILE in $(env/bin/python list_files.py); do echo "Processing file $(basename $FILE)..." >&2 wget \ --timeout 900 \ @@ -16,4 +10,4 @@ for FILE in $(python list_files.py); do --quiet \ $FILE \ | gunzip -done | pv -l -s $total | python index.py $total +done | pv -l -s $total | env/bin/python index.py $total