Skip to content
This repository has been archived by the owner on Dec 2, 2021. It is now read-only.

Commit

Permalink
Switches to using English labels for countries.
Browse files Browse the repository at this point in the history
  • Loading branch information
justinlittman committed Nov 26, 2018
1 parent 2a9f866 commit 2fba3d9
Show file tree
Hide file tree
Showing 6 changed files with 422 additions and 201 deletions.
200 changes: 200 additions & 0 deletions lib/translation_maps/countries.csv

Large diffs are not rendered by default.

194 changes: 0 additions & 194 deletions lib/translation_maps/countries.tsv

This file was deleted.

171 changes: 171 additions & 0 deletions lib/translation_maps/country_names.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
3041565,Andorra,,True
1149361,Afghanistan,,True
783754,Albania,,True
174982,Armenia,True,True
3351879,Angola,,True
3865483,Argentina,,True
2782113,Austria,,True
2077456,Australia,True,True
3577279,Aruba,True,
587116,Azerbaijan,,True
3277605,Bosnia and Herzegovina,True,
2802361,Belgium,,True
732800,Bulgaria,,True
290291,Bahrain,,True
433561,Burundi,,True
2395170,Benin,,True
3578476,Saint Barthélemy,True,
1820814,Brunei,,True
3923057,Bolivia,,True
7626844,"Bonaire, Sint Eustatius, and Saba",,True
7626844,"Bonaire, Saint Eustatius, and Saba",True,
3469034,Brazil,True,
3572887,Bahamas,,True
630336,Belarus,,True
203312,Congo,,True
203312,DR Congo,,True
2260494,Republic of the Congo,True,
2260494,Congo,,True
2658434,Switzerland,True,True
2287781,Ivory Coast,True,
3895114,Chile,,True
2233387,Cameroon,,True
1814991,China,True,True
3686110,Colombia,,True
3624060,Costa Rica,,True
3562981,Cuba,,True
3374766,Cape Verde,,True
3374766,Republic of Cabo Verde,True,
3374766,Cabo Verde,True,True
7626836,Curaçao,True,True
3077311,Czechia,,True
3077311,Czech Republic,True,
2921044,Germany,True,True
223816,Djibouti,,True
2623032,Denmark,,True
3575830,Dominica,True,True
3508796,Dominican Republic,True,True
2589581,Algeria,,True
3658394,Ecuador,,True
453733,Estonia,,True
357994,Egypt,True,True
338010,Eritrea,,True
2510769,Spain,True,
337996,Ethiopia,,True
660013,Finland,True,True
2400553,Gabon,,True
2635167,Great Britain,,True
2635167,United Kingdom,True,True
614540,Georgia,True,True
2413451,Gambia,,True
2413451,The Gambia,True,
2420477,Guinea,,True
2309096,Equatorial Guinea,True,True
390903,Greece,True,True
3595528,Guatemala,,True
2372248,Guinea-Bissau,,True
3608932,Honduras,,True
3202326,Croatia,,True
3723988,Haiti,,True
719819,Hungary,True,True
1643084,Indonesia,,True
2963597,Ireland,,True
294640,Israel,,True
1269750,India,,True
99237,Iraq,,True
130758,Iran,True,True
2629691,Iceland,,True
3175395,Italy,True,
248816,Jordan,,True
248816,Hashemite Kingdom of Jordan,True,
1527747,Kyrgyzstan,True,True
1831722,Cambodia,True,True
921929,Comoros,,True
3575174,St Kitts and Nevis,True,
1873107,North Korea,True,True
1835841,South Korea,,True
1835841,Republic of Korea,True,
285570,Kuwait,,True
1522867,Kazakhstan,,True
1655842,Laos,True,True
272103,Lebanon,True,True
3576468,Saint Lucia,True,
3042058,Liechtenstein,,True
1227603,Sri Lanka,True,True
597427,Lithuania,,True
597427,Republic of Lithuania,True,
2960313,Luxembourg,,True
458258,Latvia,True,True
2215636,Libya,True,True
2542007,Morocco,True,True
617790,Moldova,,True
617790,Republic of Moldova,True,
3194884,Montenegro,True,True
3578421,Saint Martin,True,
1062947,Madagascar,,True
718075,Macedonia,True,True
1327865,Myanmar,True,
2029969,Mongolia,True,True
2378080,Mauritania,,True
2562770,Malta,True,True
934292,Mauritius,True,
1282028,Maldives,,True
3996063,Mexico,True,True
1036973,Mozambique,,True
2440476,Niger,,True
3617476,Nicaragua,,True
2750405,Netherlands,True,True
3144096,Norway,True,True
1282988,Nepal,,True
2110425,Nauru,,True
286963,Oman,,True
3703430,Panama,,True
3932488,Peru,,True
1694008,Philippines,True,True
1168579,Pakistan,,True
798544,Poland,True,True
2264397,Portugal,,True
3437598,Paraguay,,True
289688,Qatar,,True
2017370,Russia,True,True
49518,Rwanda,,True
102358,Saudi Arabia,,True
2661886,Sweden,,True
1880251,Singapore,,True
3190538,Slovenia,,True
3057568,Slovakia,True,True
2245662,Senegal,,True
51537,Somalia,True,True
3382998,Suriname,,True
7909807,Republic of South Sudan,True,
7909807,South Sudan,,True
2410758,São Tomé and Príncipe,True,True
3585968,El Salvador,,True
7609695,Sint Maarten,True,
163843,Syria,,True
934841,Eswatini,True,
2434508,Chad,,True
1546748,French Southern Territories,True,
2363686,Togo,,True
1605651,Thailand,,True
1220409,Tajikistan,,True
1966436,Timor-Leste,True,
2464461,Tunisia,,True
298795,Turkey,True,True
149590,Tanzania,,True
226074,Uganda,True,
6252001,United States,,True
6252001,United States of America,True,
3439705,Uruguay,,True
1512440,Uzbekistan,,True
3164670,Vatican City,,True
3577815,St Vincent and Grenadines,True,
3625428,Venezuela,True,True
1562822,Vietnam,,True
2134431,Vanuatu,,True
4034894,Samoa,,True
831053,Kosovo,,True
69543,Yemen,True,True
953987,South Africa,,True
895949,Zambia,True,True
878675,Zimbabwe,True,
7 changes: 4 additions & 3 deletions lib/translation_maps/country_names_to_geocode_ids.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# frozen_string_literal: true

# This requires countries.tsv. To create:
# To create:
# curl http://download.geonames.org/export/dump/allCountries.zip > allCountries.zip
# unzip -p allCountries.zip | grep "\tPCLI\t" > lib/translation_maps/countries.tsv
# unzip allCountries.zip
# cat allCountries.txt | csvgrep -d $'\t' -u 3 -H -c 8 -m "PCLI" | csvcut -c 1-4 | tail -n +2 > lib/translation_maps/countries.csv

require 'csv'

countries = {}
CSV.foreach(File.join(File.dirname(__FILE__), 'countries.tsv'), col_sep: "\t") do |row|
CSV.foreach(File.join(File.dirname(__FILE__), 'countries.csv')) do |row|
geocode_id = row[0]
# Name
countries[row[1].downcase] = geocode_id
Expand Down
33 changes: 29 additions & 4 deletions lib/translation_maps/geocodes_to_country_names.rb
Original file line number Diff line number Diff line change
@@ -1,15 +1,40 @@
# frozen_string_literal: true

# This requires countries.tsv. To create:
# This requires countries.csv.
# To create:
# curl http://download.geonames.org/export/dump/allCountries.zip > allCountries.zip
# unzip -p allCountries.zip | grep "\tPCLI\t" > lib/translation_maps/countries.tsv
# unzip allCountries.zip
# cat allCountries.txt | csvgrep -d $'\t' -u 3 -H -c 8 -m "PCLI" | csvcut -c 1-4 | \
# tail -n +2 > lib/translation_maps/countries.csv

# This requires englishAlternateNamesV2.csv.
# To create:
# curl http://download.geonames.org/export/dump/alternateNamesV2.zip > alternateNamesV2.zip
# unzip alternateNamesV2.zip
# cat alternateNamesV2.txt | csvgrep -d $'\t' -H -u 3 -c 3 -m "en" | csvgrep -c 8 -m "1" -i | csvgrep -c 5,6 -a -m "1" | \
# tail -n +2 > englishAlternateNamesV2.csv
# csvjoin -c 1,2 -H --snifflimit 0 lib/translation_maps/countries.csv englishAlternateNamesV2.csv | \
# csvcut -c 1,7,8,9 | tail -n +2 > lib/translation_maps/country_names.csv

require 'csv'

def to_geoname(geocode_id)
Rialto::Etl::Vocabs::SWS_GEONAMES["#{geocode_id}/"].to_s
end

countries = {}
CSV.foreach(File.join(File.dirname(__FILE__), 'countries.tsv'), col_sep: "\t") do |row|
CSV.foreach(File.join(File.dirname(__FILE__), 'country_names.csv')) do |row|
geocode_id = row[0]
country = row[1]
is_preferred = row[2]
is_short = row[3]
# If (nothing yet and is_short) or is_preferred then set
countries[to_geoname(geocode_id)] = country if (!countries.key?(to_geoname(geocode_id)) && is_short) || is_preferred
end
CSV.foreach(File.join(File.dirname(__FILE__), 'countries.csv')) do |row|
geocode_id = row[0]
country = row[1]
countries[Rialto::Etl::Vocabs::SWS_GEONAMES["#{geocode_id}/"].to_s] = country
# If nothing yet then set
countries[to_geoname(geocode_id)] = country unless countries.key?(to_geoname(geocode_id))
end
countries
18 changes: 18 additions & 0 deletions spec/translation_maps/geocodes_to_country_names_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# frozen_string_literal: true

require 'rialto/etl/namespaces'

# rubocop:disable RSpec/DescribeClass
RSpec.describe 'geocodes_to_country_names' do
subject(:translation_map) { Traject::TranslationMap.new('geocodes_to_country_names') }

describe 'lookup' do
it 'returns a preferred name before a short name' do
expect(translation_map[Rialto::Etl::Vocabs::SWS_GEONAMES['6252001/'].to_s]).to eq('United States of America')
end
it 'returns a short name when no preferred name' do
expect(translation_map[Rialto::Etl::Vocabs::SWS_GEONAMES['3932488/'].to_s]).to eq('Peru')
end
end
end
# rubocop:enable RSpec/DescribeClass

0 comments on commit 2fba3d9

Please sign in to comment.