Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
srozb committed Dec 18, 2020
1 parent a390fcd commit eb6ce59
Show file tree
Hide file tree
Showing 11 changed files with 689 additions and 1 deletion.
143 changes: 143 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

.vscode/*
*.csv
data/*
report.csv
45 changes: 44 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,45 @@
# nxdetective
Mass resolve 100k domains in less than 5 minutes
Mass resolve 100k domains in less than 5 minutes, report non-existent ones.

The purpose of this script was to go through the list of every domain found in
outbound smtp logs and find those non-existent and therefore ready to be
registered by an adversary in order to collect mistakenly addressed emails.

![nxdetective in action](docs/action.gif)

## Requirements

* Python >= `3.7` (`3.8` is recommended)
* libs from `requirements.txt`

## Installation

```bash
clone this repo
python3 -m pip install -r requirements.txt
```

## Usage

```bash
python3 main.py process --nameservers 1.1.1.1,8.8.8.8 --workers_num 5 list.csv
```

Will spawn 10 asynchronous tasks (5 for `1.1.1.1` and 5 for `8.8.8.8`) and
resolve domains read from `list.csv` file. Non-existent domains will be written
to `report.csv`.

Example `list.csv`:

```csv
domain;popularity
example.org;1
e-xample.org;5
example.cn;3
e-x-ple.com;1
```

_(Note: `popularity` column is mandatory, but values are irrelevant and won't
affect script's behaviour). Its only purpose is to indicate subjective
popularity of the domain. This is useful if you take a list of domains from http
or smtp logs and know exactly how popular it is within your environment._
32 changes: 32 additions & 0 deletions csvreader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from worker import (
Worker,
WorkerMeta
)
import csv
import trio
from loguru import logger as l


class CSVReader(Worker):
def __init__(self, src_file: str, data_dest: trio.MemorySendChannel):
self.__src_file = src_file
self.__data_dest = data_dest
self.__csv_src = open(self.__src_file, 'r')
self.reader = csv.reader(self.__csv_src)
self.__meta__ = WorkerMeta(
w_id=0, name="CSVReader", entity=self.__src_file)

def setup_worker(self):
"""Always omit the CSV header line"""
self.reader.__next__() # skip header

async def run(self):
"""Read the whole file"""
self.change_status("setting up")
self.setup_worker()
self.change_status("running")
async with self.__data_dest:
for item in self.reader:
await self.__data_dest.send(item)
self.__meta__.items_processed += 1
self.change_status("done")
Binary file added docs/action.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
31 changes: 31 additions & 0 deletions domain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import List, Optional
from dns import rrset
from pydantic import (
BaseModel,
validator
)
import tldextract
import re


class Domain(BaseModel):
name: str
answer: List[Optional[rrset.RRset]]
outcome: str
popularity: int

class Config:
arbitrary_types_allowed = True

@validator('name')
def test_domain_name(cls, v) -> bool:
"""Domain name validator that rejects values with invalid TLD or not being matched by a generic domain regex"""
v = v.strip().lower()
cls.tld = str(tldextract.extract(v).suffix)
regex = r"^((?!-)[A-Za-z0-9-]{1,63}(?<!-)\.)+[A-Za-z]{2,6}$"
p = re.compile(regex)
if cls.tld == "":
raise ValueError("Unable to determine TLD")
if not(re.search(p, v)):
raise ValueError("Invalid domain")
return v
51 changes: 51 additions & 0 deletions domainresolver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from worker import Worker
import trio
from domain import Domain

from dns import asyncresolver
from loguru import logger as l
from typing import (
List,
Union,
Optional
)


class DomainResolver(Worker):
@property
def nameserver(self) -> Optional[str]:
return self.__nameserver

@nameserver.setter
def nameserver(self, value: str):
self.__nameserver = value
self.__meta__.entity = self.__nameserver

def __init__(self, data_source: trio.MemoryReceiveChannel, data_dest: trio.MemorySendChannel):
self.__nameserver = None
super().__init__(data_source, data_dest)
self.__meta__.name = "DomainResolver"
self.__meta__.item_unit = "domains"

def setup_worker(self):
"""Setup the Async Resolver instance"""
self.__Resolver = asyncresolver.Resolver()
self.__Resolver.nameservers = [self.__nameserver]

async def process(self, item: List[Union[str, int]]) -> Union[Domain, None]:
"""If a domain name is valid, try to resolve it"""
try:
domain = Domain(name=item[0], answer=[],
outcome="PENDING", popularity=item[1])
self.__meta__.current_item = item[0]
except ValueError:
l.debug(f"Discarding: {item[0]}")
return
l.debug(f"[{self.__nameserver}] Resolving: {domain.name}")
try:
resolved = await self.__Resolver.resolve(domain.name, "MX")
domain.answer = resolved.response.answer
domain.outcome = "OK"
except Exception as e:
domain.outcome = e.__class__.__name__
return domain
85 changes: 85 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/env python

from reporter import Reporter
import dns.name
import fire
import sys

from typing import (
List,
)

import trio
from loguru import logger as l

from domainresolver import DomainResolver
from csvreader import CSVReader
from painter import Painter

Domains = trio.open_memory_channel(65536)
Resolved = trio.open_memory_channel(65536)


def create_resolver_workers(nameservers: str = "", workers_num: int = 1) -> List[object]:
"""Create DomainResolver tasks depending on nameservers configured and desired number of workers"""
Workers = []
if nameservers:
ns_to_use = nameservers.split(',')
else:
ns_to_use = dns.asyncresolver.get_default_resolver().nameservers
for i in range(workers_num):
for ns in ns_to_use:
W = DomainResolver(Domains[1], Resolved[0])
W.nameserver = ns
W.__meta__.w_id = i
Workers.append(W)
return Workers


def create_csvreader_workers(csv_file: str) -> List[object]:
"""Create CSV Reader task"""
CSV_Worker = CSVReader(csv_file, Domains[0])
return [CSV_Worker]


def create_reporter_workers(report_file: str) -> List[object]:
"""Create Reporter task (statistics & csv report)"""
File_Reporter = Reporter(Resolved[1], report_file)
return [File_Reporter]


def create_painter(workers: List[object]) -> list:
"""Create Painter task (dashboards)"""
p = Painter(workers=workers)
return [p]


async def process(domain_file: str, nameservers: str = "",
workers_num: int = 1, debug=False):
"""Process given CSV file, resolve domains and create a report.csv"""
Workers = [] # TODO: Workers should be global so painter task has always accurate data
Workers += create_resolver_workers(nameservers, workers_num)
Workers += create_csvreader_workers(domain_file)
Workers += create_reporter_workers("report.csv")
if not debug:
l.remove()
l.add(sys.stderr, level="INFO")
Workers += create_painter(Workers)


async with trio.open_nursery() as nursery:
for Worker in Workers:
l.debug(f"Starting worker: {Worker}")
nursery.start_soon(Worker.run)


def main(domain_file: str, nameservers: str = "",
workers_num: int = 1, debug: bool = False):
"""Run asynchronous tasks"""
trio.run(process, domain_file, nameservers, workers_num, debug)


if __name__ == "__main__":
fire.Fire({
'process': main,
})
Loading

0 comments on commit eb6ce59

Please sign in to comment.