Skip to content

Commit

Permalink
name changed
Browse files Browse the repository at this point in the history
  • Loading branch information
adbar committed Apr 8, 2019
1 parent ab5b682 commit 480787f
Show file tree
Hide file tree
Showing 9 changed files with 698 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[run]
source = textract
source = html-extractor

omit =
tests/*
Expand Down
674 changes: 674 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

22 changes: 11 additions & 11 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
textract: ...
html-extractor: ...
==============================================

.. image:: https://img.shields.io/pypi/v/textract.svg
:target: https://pypi.python.org/pypi/textract
.. image:: https://img.shields.io/pypi/v/html-extractor.svg
:target: https://pypi.python.org/pypi/html-extractor

.. image:: https://img.shields.io/pypi/l/textract.svg
:target: https://pypi.python.org/pypi/textract
.. image:: https://img.shields.io/pypi/l/html-extractor.svg
:target: https://pypi.python.org/pypi/html-extractor

.. image:: https://img.shields.io/pypi/pyversions/textract.svg
:target: https://pypi.python.org/pypi/textract
.. image:: https://img.shields.io/pypi/pyversions/html-extractor.svg
:target: https://pypi.python.org/pypi/html-extractor

.. image:: https://img.shields.io/travis/adbar/textract.svg
:target: https://travis-ci.org/adbar/textract
.. image:: https://img.shields.io/travis/adbar/html-extractor.svg
:target: https://travis-ci.org/adbar/html-extractor

.. image:: https://img.shields.io/codecov/c/github/adbar/textract.svg
:target: https://codecov.io/gh/adbar/textract
.. image:: https://img.shields.io/codecov/c/github/adbar/html-extractor.svg
:target: https://codecov.io/gh/adbar/html-extractor


Description here.
Expand Down
4 changes: 2 additions & 2 deletions textract/__init__.py → html-extractor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# -*- coding: utf-8 -*-
"""
Extract the date of web pages, or web archeology in practice.
Extract the text content of web pages.
"""

## meta
__title__ = 'textract'
__title__ = 'html-extractor'
__author__ = 'Adrien Barbaresi'
__license__ = 'GNU GPL v3'
__copyright__ = 'Copyright 2019, Adrien Barbaresi'
Expand Down
2 changes: 1 addition & 1 deletion textract/core.py → html-extractor/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Module bundling all functions needed to extract the text in a webpage.
"""

## This file is available from https://github.com/adbar/textract
## This file is available from https://github.com/adbar/html-extractor
## under GNU GPL v3 license

# compatibility
Expand Down
File renamed without changes.
10 changes: 5 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
"""
Seamlessly extract the date of web pages based on header or body.
http://github.com/adbar/textract
http://github.com/adbar/html-extractor
"""

from codecs import open # python2
Expand All @@ -16,15 +16,15 @@


here = os.path.abspath(os.path.dirname(__file__))
packages = ['textract']
packages = ['html-extractor']


def readme():
with open(os.path.join(here, 'README.rst'), 'r', 'utf-8') as readmefile:
return readmefile.read()

setup(
name='textract',
name='html-extractor',
version='0.0.1',
description='',
long_description=readme(),
Expand Down Expand Up @@ -52,7 +52,7 @@ def readme():
'Topic :: Text Processing :: Markup :: HTML',
],
keywords=['entity-extraction', 'html-extraction', 'html-parsing', 'metadata-extraction', 'webarchives', 'web-scraping'],
url='http://github.com/adbar/textract',
url='http://github.com/adbar/html-extractor',
author='Adrien Barbaresi',
author_email='[email protected]',
license='GPLv3+',
Expand All @@ -73,4 +73,4 @@ def readme():
# platforms='any',
tests_require=['pytest', 'tox'],
zip_safe=False,
)
)
2 changes: 1 addition & 1 deletion tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import os


import textract
import html-extractor
import pytest # unittest?


Expand Down
6 changes: 3 additions & 3 deletions tests/unit_tests.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# -*- coding: utf-8 -*-
"""
Unit tests for the textract library.
Unit tests for the html-extractor library.
"""

import logging
import os
import sys
# https://docs.pytest.org/en/latest/

import textract
import html-extractor

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

Expand All @@ -29,7 +29,7 @@ def load_mock_page(url):

def test_main():
'''test extraction from HTML'''
assert textract.process_record(load_mock_page('https://die-partei.net/sh/'), 'https://die-partei.net/sh/', '0000') is not None
assert html-extractor.process_record(load_mock_page('https://die-partei.net/sh/'), 'https://die-partei.net/sh/', '0000') is not None


if __name__ == '__main__':
Expand Down

0 comments on commit 480787f

Please sign in to comment.