From 6a79cebd913aa4fcb0b0acdf942efbadb7b8eedf Mon Sep 17 00:00:00 2001
From: malinero <malinero@protonmail.com>
Date: Sat, 17 Sep 2022 21:09:44 +0200
Subject: [PATCH] update

---
 README.md           |  33 +++++++++++++-
 gen_from_geolite.py |  47 +++++++++++++++++++
 gen_from_static.py  |  42 +++++++++++++++++
 gen_from_urls.py    |  46 +++++++++++++++++++
 gen_peer_groups.py  | 109 --------------------------------------------
 main.py             |  22 +++++++++
 6 files changed, 189 insertions(+), 110 deletions(-)
 create mode 100644 gen_from_geolite.py
 create mode 100644 gen_from_static.py
 create mode 100644 gen_from_urls.py
 delete mode 100644 gen_peer_groups.py
 create mode 100644 main.py

diff --git a/README.md b/README.md
index 751dccd..fa261c9 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,32 @@
-# gen_peer_groups
\ No newline at end of file
+# Generate peer grouping file
+
+## Generate mapping file
+
+
+```sh
+python main.py > mapping.txt
+```
+
+### GeoIP
+
+Download GeoLite2-ASN-Blocks-IPv4.csv/GeoLite2-ASN-Blocks-IPv6.csv from maxmind (requires email)
+
+```sh
+python main.py --include-geolite --path /path/to/GeoLite2-ASN-Blocks-IPv*.csv > mapping.txt
+```
+
+
+## Compress mapping file
+
+```sh
+git clone https://github.com/sipa/asmap -b nextgen
+cd asmap
+python3 asmap-tool.py encode ../mapping.txt mapping.bin
+```
+
+
+## Monero usage
+
+```sh
+monerod --asmap /path/to/mapping.bin ....
+```
diff --git a/gen_from_geolite.py b/gen_from_geolite.py
new file mode 100644
index 0000000..ee36180
--- /dev/null
+++ b/gen_from_geolite.py
@@ -0,0 +1,47 @@
+import csv
+
+
+ASN_GROUPS = {
+    # grep -i google GeoLite2-ASN-Blocks-IPv4.csv | cut -d , -f 2 | sort | uniq
+    "google": [139070, 139190, 15169, 16550, 16591, 19527, 36384, 36385, 36492, 395973, 396982, 41264, 43515, 45566],
+    # grep -i microsoft GeoLite2-ASN-Blocks-IPv4.csv | cut -d , -f 2 | sort | uniq
+    "ms": [12076, 200517, 23468, 35106, 3598, 45139, 58862, 59067, 6584, 8068, 8069, 8070, 8071, 8075],
+    # grep -i -E "amazon|aws-" GeoLite2-ASN-Blocks-IPv4.csv | cut -d , -f 2 | sort | uniq
+    "amazon": [ 14618, 16509, 19047, 22449, 262486, 262772, 263639, 264167, 264344, 264509, 266122, 266194, 267242, 268063, 271017, 271047, 36263, 52994, 61577, 62785, 7224, 8987],
+    # grep -i cloudflare GeoLite2-ASN-Blocks-IPv4.csv | cut -d , -f 2 | sort | uniq
+    "cloudflare": [ 132892, 13335, 139242, 202623, 203898, 209242, 395747],
+    # grep -i ovh GeoLite2-ASN-Blocks-IPv4.csv | cut -d , -f 2 | sort | uniq
+    "ovh": [16276, 35540],
+    # grep -i -E "digital.*ocean" GeoLite2-ASN-Blocks-IPv4.csv | cut -d , -f 2 | sort | uniq
+    "digitalocean": [14061, 205301, 209396, 39690],
+    # https://ipinfo.io/countries/ma
+    "morocco": [6713, 36925, 36903, 36884, 30983, 36941, 327989, 328066, 328867, 328055, 328577, 328709, 328493, 328671, 328541, 327917, 328272, 328280, 328799, 37450, 328268, 37787, 328960, 36956]
+}
+
+
+def generate(paths, current):
+    data = {"asn": dict(), "org": dict()}
+    for path in paths:
+        reader = csv.DictReader(open(path, "r"))
+        for row in reader:
+            org = row['autonomous_system_organization']
+            asn = row['autonomous_system_number']
+
+            for k,v in ASN_GROUPS.items():
+                if int(asn) in v:
+                    org = k
+                    break
+            if not org in data["org"]:
+                data["org"][org] = set()
+            data["org"][org].add(asn)
+
+            if not asn in data["asn"]:
+                data["asn"][asn] = set()
+            data["asn"][asn].add(row['network'])
+
+    for org in sorted(data["org"].keys()):
+        current += 1
+        for asn in data["org"][org]:
+            for address in data["asn"][asn]:
+                print(address, "AS%d"%(current))
+    return current
diff --git a/gen_from_static.py b/gen_from_static.py
new file mode 100644
index 0000000..40eb807
--- /dev/null
+++ b/gen_from_static.py
@@ -0,0 +1,42 @@
+# https://en.wikipedia.org/wiki/United_States_Department_of_Defense
+
+
+def us_dod():
+    return [
+        "6.0.0.0/8",
+        "7.0.0.0/8",
+        "11.0.0.0/8",
+        "21.0.0.0/8",
+        "22.0.0.0/8",
+        "26.0.0.0/8",
+        "28.0.0.0/8",
+        "29.0.0.0/8",
+        "30.0.0.0/8",
+        "55.0.0.0/8",
+        "205.0.0.0/8",
+        "214.0.0.0/8",
+        "215.0.0.0/8"
+    ]
+
+
+# List of assigned /8 blocks to commercial organisations
+def singles():
+    return [
+        "12.0.0.0/8",  # AT&T
+        "17.0.0.0/8",  # Apple
+        "19.0.0.0/8",  # Ford Motor
+        "38.0.0.0/8",  # Cogent communication
+        "48.0.0.0/8",  # Prudential securities
+        "53.0.0.0/8",  # Mercedes Benz
+        "56.0.0.0/8",  # US postal services
+        "73.0.0.0/8",  # Comcast
+    ]
+
+
+def generate(current):
+    for entry in us_dod():
+        print(entry, "AS%d"%(current))
+    for entry in singles():
+        current += 1
+        print(entry, "AS%d"%(current))
+    return current
diff --git a/gen_from_urls.py b/gen_from_urls.py
new file mode 100644
index 0000000..ca9a38b
--- /dev/null
+++ b/gen_from_urls.py
@@ -0,0 +1,46 @@
+import re
+import requests
+
+
+# NB: doesn't take into account https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-byoip.html
+def aws():
+    ip_ranges = requests.get('https://ip-ranges.amazonaws.com/ip-ranges.json').json()
+    ipv4 = [item["ip_prefix"] for item in ip_ranges["prefixes"]]
+    ipv6 = [item["ipv6_prefix"] for item in ip_ranges["ipv6_prefixes"]]
+    return ipv4 + ipv6
+
+
+def azure():
+    response = requests.get("https://www.microsoft.com/en-us/download/confirmation.aspx?id=56519")
+    found = re.search("href=\"https://download\.microsoft\.com/download/.*/ServiceTags_Public_.*\.json\"", response.text)
+    if not found:
+        raise ValueError("Failed to extract Azure download url")
+    response = requests.get(found.group()[len('href="'):-1])
+    res = []
+    for entry in response.json()["values"]:
+        res.extend(entry["properties"]["addressPrefixes"])
+    return res
+
+
+def cloudflare():
+    return requests.get("https://www.cloudflare.com/ips-v4").text.split("\n") + requests.get("https://www.cloudflare.com/ips-v6").text.split("\n")
+
+
+def google():
+    def _get_google(url):
+        prefixes = requests.get("https://www.gstatic.com/ipranges/goog.json").json()["prefixes"]
+        return [p["ipv4Prefix"] if "ipv4Prefix" in p else p["ipv6Prefix"] for p in prefixes]
+    return _get_google("https://www.gstatic.com/ipranges/goog.json") + _get_google("https://www.gstatic.com/ipranges/cloud.json")
+
+
+# https://en.wikipedia.org/wiki/United_States_Department_of_Defense
+def us_dod():
+    return ["6.0.0.0/8", "7.0.0.0/8", "11.0.0.0/8", "21.0.0.0/8", "22.0.0.0/8", "26.0.0.0/8", "28.0.0.0/8", "29.0.0.0/8", "30.0.0.0/8", "55.0.0.0/8", "205.0.0.0/8", "214.0.0.0/8", "215.0.0.0/8"]
+
+
+def generate(current):
+    for config in [aws, azure, cloudflare, google, us_dod]:
+        current += 1
+        for entry in config():
+            print(entry, "AS%d"%(current))
+    return current
diff --git a/gen_peer_groups.py b/gen_peer_groups.py
deleted file mode 100644
index a0f0be2..0000000
--- a/gen_peer_groups.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import argparse
-import csv
-import re
-import requests
-
-
-GROUPS = {
-    # grep -i google GeoLite2-ASN-Blocks-IPv4.csv | cut -d , -f 2 | sort | uniq
-    "google": [139070, 139190, 15169, 16550, 16591, 19527, 36384, 36385, 36492, 395973, 396982, 41264, 43515, 45566],
-    # grep -i microsoft GeoLite2-ASN-Blocks-IPv4.csv | cut -d , -f 2 | sort | uniq
-    "ms": [12076, 200517, 23468, 35106, 3598, 45139, 58862, 59067, 6584, 8068, 8069, 8070, 8071, 8075],
-    # grep -i -E "amazon|aws-" GeoLite2-ASN-Blocks-IPv4.csv | cut -d , -f 2 | sort | uniq
-    "amazon": [ 14618, 16509, 19047, 22449, 262486, 262772, 263639, 264167, 264344, 264509, 266122, 266194, 267242, 268063, 271017, 271047, 36263, 52994, 61577, 62785, 7224, 8987],
-    # grep -i cloudflare GeoLite2-ASN-Blocks-IPv4.csv | cut -d , -f 2 | sort | uniq
-    "cloudflare": [ 132892, 13335, 139242, 202623, 203898, 209242, 395747],
-    # grep -i ovh GeoLite2-ASN-Blocks-IPv4.csv | cut -d , -f 2 | sort | uniq
-    "ovh": [16276, 35540],
-    # grep -i -E "digital.*ocean" GeoLite2-ASN-Blocks-IPv4.csv | cut -d , -f 2 | sort | uniq
-    "digitalocean": [14061, 205301, 209396, 39690],
-    # https://ipinfo.io/countries/ma
-    "morocco": [6713, 36925, 36903, 36884, 30983, 36941, 327989, 328066, 328867, 328055, 328577, 328709, 328493, 328671, 328541, 327917, 328272, 328280, 328799, 37450, 328268, 37787, 328960, 36956]
-}
-
-# NB: doesn't take into account https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-byoip.html
-def aws():
-    ip_ranges = requests.get('https://ip-ranges.amazonaws.com/ip-ranges.json').json()
-    ipv4 = [item["ip_prefix"] for item in ip_ranges["prefixes"]]
-    ipv6 = [item["ipv6_prefix"] for item in ip_ranges["ipv6_prefixes"]]
-    return ipv4 + ipv6
-
-
-def azure():
-    response = requests.get("https://www.microsoft.com/en-us/download/confirmation.aspx?id=56519")
-    found = re.search("href=\"https://download\.microsoft\.com/download/.*/ServiceTags_Public_.*\.json\"", response.text)
-    if not found:
-        raise ValueError("Failed to extract Azure download url")
-    response = requests.get(found.group()[len('href="'):-1])
-    res = []
-    for entry in response.json()["values"]:
-        res.extend(entry["properties"]["addressPrefixes"])
-    return res
-
-
-def cloudflare():
-    return requests.get("https://www.cloudflare.com/ips-v4").text.split("\n") + requests.get("https://www.cloudflare.com/ips-v6").text.split("\n") 
-
-
-def google():
-    def _get_google(url):
-        prefixes = requests.get("https://www.gstatic.com/ipranges/goog.json").json()["prefixes"]
-        return [p["ipv4Prefix"] if "ipv4Prefix" in p else p["ipv6Prefix"] for p in prefixes]
-    return _get_google("https://www.gstatic.com/ipranges/goog.json") + _get_google("https://www.gstatic.com/ipranges/cloud.json")
-
-
-# https://en.wikipedia.org/wiki/United_States_Department_of_Defense
-def us_dod():
-    return ["6.0.0.0/8", "7.0.0.0/8", "11.0.0.0/8", "21.0.0.0/8", "22.0.0.0/8", "26.0.0.0/8", "28.0.0.0/8", "29.0.0.0/8", "30.0.0.0/8", "55.0.0.0/8", "205.0.0.0/8", "214.0.0.0/8", "215.0.0.0/8"]
-
-
-def generate_from_geolite(paths):
-    data = {"asn": dict(), "org": dict()}
-    for path in paths:
-        reader = csv.DictReader(open(path, "r"))
-        for row in reader:
-            org = row['autonomous_system_organization']
-            asn = row['autonomous_system_number']
-            
-            for k,v in GROUPS.items():
-                if int(asn) in v:
-                    org = k
-                    break
-            if not org in data["org"]:
-                data["org"][org] = set()
-            data["org"][org].add(asn)
-    
-            if not asn in data["asn"]:
-                data["asn"][asn] = set()
-            data["asn"][asn].add(row['network'])
-
-    for index, org in enumerate(sorted(data["org"].keys())):
-        index += 1
-        for asn in data["org"][org]:
-            for address in data["asn"][asn]:
-                print(address, "AS%d"%(index))
-
-
-def generate_from_url():
-    current = 0
-    for config in [aws, azure, cloudflare, google, us_dod]:
-        current += 1
-        for entry in config():
-            print(entry, "AS%d"%(current))
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Generate AS mapping file")
-    parser.add_argument("--method", choices=["geolite", "urls"], default="geolite", help="Method (%(default)s)")
-    parser.add_argument("--path", default=["GeoLite2-ASN-Blocks-IPv4.csv", "GeoLite2-ASN-Blocks-IPv6.csv"], nargs="+", help="Path to GeoLite2-ASN-Blocks-IPv4.csv")
-    args = parser.parse_args()
-    if args.method == "geolite":
-        generate_from_geolite(args.path)
-    elif args.method == "urls":
-        generate_from_url()
-    else:
-        raise ValueError("bug")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..d484c7f
--- /dev/null
+++ b/main.py
@@ -0,0 +1,22 @@
+import argparse
+
+import gen_from_geolite as gl
+import gen_from_static as gs
+import gen_from_urls as gu
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate AS mapping file")
+    parser.add_argument("--method", choices=["geolite", "urls"], default="geolite", help="Method (%(default)s)")
+    parser.add_argument("--include-geolite", action="store_true", help="Include geolite data (require downloading geolite database before)")
+    parser.add_argument("--geolite-path", default=["GeoLite2-ASN-Blocks-IPv4.csv", "GeoLite2-ASN-Blocks-IPv6.csv"], nargs="+", help="Path to GeoLite2-ASN-Blocks-IPv4.csv")
+    args = parser.parse_args()
+    current = 0
+    current = gu.generate(current)
+    current = gs.generate(current)
+    if args.include_geolite:
+        gl.generate(args.geolite_path, current)
+
+
+if __name__ == "__main__":
+    main()