tatuylonen · kristian-clausal · May 12, 2023 · Apr 23, 2023 · Apr 23, 2023 · Apr 27, 2023
diff --git a/Makefile b/Makefile
@@ -4,7 +4,7 @@
 # --with-coverage --coverage-report=html --coverage-config=tests/testcov.conf
 test:
 	rm -rf .coverage*
-	nose2 --output-buffer --pretty-assert
+	python -m nose2 --output-buffer --pretty-assert
 	# rm -rf htmlcov
 	# mv -f htmlcov.new htmlcov
 

diff --git a/README.md b/README.md
@@ -304,39 +304,35 @@ Preparation: on Linux (example from Ubuntu 20.04), you may need to
 first install the ``build-essential`` and ``python3-dev`` packages
 with ``apt update && apt install build-essential python3-dev python3-pip``.
 
-To install ``wiktextract``, use ``pip`` (or ``pip3``, as appropriate):
-```
-pip3 install wiktextract
-```
-
-Alternatively, you can get the latest development version from github:
+Install `wiktextract` from source:
 
 ```
-git clone https://github.com/tatuylonen/wikitextprocessor.git
-cd wikitextprocessor && pip3 install -r requirements.txt && pip3 install -e .
-cd ..
 git clone https://github.com/tatuylonen/wiktextract.git
-cd wiktextract && pip3 install -r requirements.txt && pip3 install -e .
+cd wiktextract
+python -m venv .venv
+source .venv/bin/activate
+python -m pip install -U pip
+python -m pip install --use-pep517 .
 ```
 
-This will install the ``wikitextprocessor``, the ``wiktextract`` package
-and the ``wiktwords`` script inside wiktextract.
+Alternatively, you can install the package from pypi.org:
 
-Only installing ``wiktextract`` will probably not work, depending on how
-old the pip packages are; wikitextprocessor and wiktextract are being worked
-on in tandem, so keeping them in synch is best.
+```
+python -m pip install wiktextract
+```
 
 This software requires Python 3.
 
 ### Running tests
 
-This package includes tests written using the ``unittest`` framework.
-They can be run using, for example, ``nose2``, which can be installed
-using ``pip3 install nose2``.
+This package includes tests written using the `unittest` framework.
+They can be run using, for example, `nose2`, which can be installed
+using `python -m pip install -e ".[dev]"`.
+
+To run the tests, use the following command in the top-level directory:
 
-To run the tests, just use the following command in the top-level directory:
 ```
-nose2
+make test
 ```
 
 (Unfortunately the test suite for ``wiktextract`` is not yet very

diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,51 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "wiktextract"
+version = "1.99.7"
+description = "Wiktionary dump file parser and multilingual data extractor"
+readme = "README.md"
+license = {text = "MIT License"}
+authors = [
+    {name = "Tatu Ylonen", email = "[email protected]"},
+]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Natural Language :: English",
+    "Operating System :: OS Independent",
+    "Operating System :: POSIX :: Linux",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Topic :: Text Processing",
+    "Topic :: Text Processing :: Linguistic",
+]
+dependencies = [
+    "nltk",
+    "python-Levenshtein",
+    "wikitextprocessor @ git+https://github.com/tatuylonen/wikitextprocessor.git",
+]
+
+[project.optional-dependencies]
+dev = ["nose2[coverage_plugin]"]
+
+[project.scripts]
+wiktwords = "wiktextract.wiktwords:main"
+
+[project.urls]
+homepage = "https://github.com/tatuylonen/wiktextract"
+
+[tool.setuptools.packages.find]
+exclude = ["usertools", "tests"]
+
+[tool.setuptools.package-data]
+wiktextract = [
+    "data/*/*",
+]
diff --git a/requirements.txt b/requirements.txt
diff --git a/setup.py b/setup.py
diff --git a/wiktwords → wiktextract/wiktwords.py b/wiktwords → wiktextract/wiktwords.py
@@ -111,7 +111,7 @@ def capture_page(model, orig_title, text, pages_dir):
     return analyze
 
 
-if __name__ == "__main__":
+def main():
     parser = argparse.ArgumentParser(
         description="Multilingual Wiktionary data extractor")
     parser.add_argument("path", type=str, nargs="?", default=None,
@@ -338,7 +338,7 @@ def capture_page(model, orig_title, text, pages_dir):
                 ctx.add_page("Scribunto", title, text, transient=True)
 
     def word_cb(data):
-        global word_count
+        nonlocal word_count
         word_count += 1
         if out_f is not None:
             if args.human_readable:
@@ -497,3 +497,7 @@ def dump_val(title, limit, counts):
             print(title)
             for (k, kk), v in lst[:limit]:
                 print("  {:5d} {}: {}".format(v, k, kk))
+
+
+if __name__ == "__main__":
+    main()