name changed

adbar · Apr 8, 2019 · 480787f · 480787f
1 parent ab5b682
commit 480787f
Show file tree

Hide file tree

Showing 9 changed files with 698 additions and 24 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -1,5 +1,5 @@
 [run]
-source = textract
+source = html-extractor
 
 omit =
     tests/*

diff --git a/LICENSE b/LICENSE
diff --git a/README.rst b/README.rst
@@ -1,20 +1,20 @@
-textract: ...
+html-extractor: ...
 ==============================================
 
-.. image:: https://img.shields.io/pypi/v/textract.svg
-    :target: https://pypi.python.org/pypi/textract
+.. image:: https://img.shields.io/pypi/v/html-extractor.svg
+    :target: https://pypi.python.org/pypi/html-extractor
 
-.. image:: https://img.shields.io/pypi/l/textract.svg
-    :target: https://pypi.python.org/pypi/textract
+.. image:: https://img.shields.io/pypi/l/html-extractor.svg
+    :target: https://pypi.python.org/pypi/html-extractor
 
-.. image:: https://img.shields.io/pypi/pyversions/textract.svg
-    :target: https://pypi.python.org/pypi/textract
+.. image:: https://img.shields.io/pypi/pyversions/html-extractor.svg
+    :target: https://pypi.python.org/pypi/html-extractor
 
-.. image:: https://img.shields.io/travis/adbar/textract.svg
-    :target: https://travis-ci.org/adbar/textract
+.. image:: https://img.shields.io/travis/adbar/html-extractor.svg
+    :target: https://travis-ci.org/adbar/html-extractor
 
-.. image:: https://img.shields.io/codecov/c/github/adbar/textract.svg
-    :target: https://codecov.io/gh/adbar/textract
+.. image:: https://img.shields.io/codecov/c/github/adbar/html-extractor.svg
+    :target: https://codecov.io/gh/adbar/html-extractor
 
 
 Description here.

diff --git a/textract/__init__.py → html-extractor/__init__.py b/textract/__init__.py → html-extractor/__init__.py
@@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
 """
-Extract the date of web pages, or web archeology in practice.
+Extract the text content of web pages.
 """
 
 ## meta
-__title__ = 'textract'
+__title__ = 'html-extractor'
 __author__ = 'Adrien Barbaresi'
 __license__ = 'GNU GPL v3'
 __copyright__ = 'Copyright 2019, Adrien Barbaresi'

diff --git a/textract/core.py → html-extractor/core.py b/textract/core.py → html-extractor/core.py
@@ -3,7 +3,7 @@
 Module bundling all functions needed to extract the text in a webpage.
 """
 
-## This file is available from https://github.com/adbar/textract
+## This file is available from https://github.com/adbar/html-extractor
 ## under GNU GPL v3 license
 
 # compatibility

diff --git a/textract/settings.py → html-extractor/settings.py b/textract/settings.py → html-extractor/settings.py
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 """
 Seamlessly extract the date of web pages based on header or body.
-http://github.com/adbar/textract
+http://github.com/adbar/html-extractor
 """
 
 from codecs import open # python2
@@ -16,15 +16,15 @@
 
 
 here = os.path.abspath(os.path.dirname(__file__))
-packages = ['textract']
+packages = ['html-extractor']
 
 
 def readme():
     with open(os.path.join(here, 'README.rst'), 'r', 'utf-8') as readmefile:
         return readmefile.read()
 
 setup(
-    name='textract',
+    name='html-extractor',
     version='0.0.1',
     description='',
     long_description=readme(),
@@ -52,7 +52,7 @@ def readme():
         'Topic :: Text Processing :: Markup :: HTML',
     ],
     keywords=['entity-extraction', 'html-extraction', 'html-parsing', 'metadata-extraction',  'webarchives', 'web-scraping'],
-    url='http://github.com/adbar/textract',
+    url='http://github.com/adbar/html-extractor',
     author='Adrien Barbaresi',
     author_email='[email protected]',
     license='GPLv3+',
@@ -73,4 +73,4 @@ def readme():
     # platforms='any',
     tests_require=['pytest', 'tox'],
     zip_safe=False,
-)
+)
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -6,7 +6,7 @@
 import os
 
 
-import textract
+import html-extractor
 import pytest # unittest?
 
 

diff --git a/tests/unit_tests.py b/tests/unit_tests.py
@@ -1,14 +1,14 @@
 # -*- coding: utf-8 -*-
 """
-Unit tests for the textract library.
+Unit tests for the html-extractor library.
 """
 
 import logging
 import os
 import sys
 # https://docs.pytest.org/en/latest/
 
-import textract
+import html-extractor
 
 logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
 
@@ -29,7 +29,7 @@ def load_mock_page(url):
 
 def test_main():
     '''test extraction from HTML'''
-    assert textract.process_record(load_mock_page('https://die-partei.net/sh/'), 'https://die-partei.net/sh/', '0000') is not None
+    assert html-extractor.process_record(load_mock_page('https://die-partei.net/sh/'), 'https://die-partei.net/sh/', '0000') is not None
 
 
 if __name__ == '__main__':