adbar · adbar · Dec 13, 2023 · Dec 12, 2023 · Dec 12, 2023 · Dec 13, 2023
diff --git a/tests/cli_tests.py b/tests/cli_tests.py
@@ -218,7 +218,7 @@ def test_download():
     #teststring = fetch_url(url)
     #assert teststring is not None
     #assert cli.examine(teststring, args, url) is None
-    url = 'https://httpbun.org/html'
+    url = 'https://httpbun.com/html'
     teststring = fetch_url(url)
     assert teststring is not None
     assert cli.examine(teststring, args, url) is not None
@@ -408,27 +408,27 @@ def test_crawling():
         args = cli.parse_args(testargs)
     cli_utils.cli_crawler(args)
 
-    testargs = ['', '--crawl', 'https://httpbun.org/html']
+    testargs = ['', '--crawl', 'https://httpbun.com/html']
     with patch.object(sys, 'argv', testargs):
         args = cli.parse_args(testargs)
     f = io.StringIO()
     with redirect_stdout(f):
         cli_utils.cli_crawler(args)
-    assert f.getvalue() == 'https://httpbun.org/html\n'
+    assert f.getvalue() == 'https://httpbun.com/html\n'
 
     spider.URL_STORE = UrlStore(compressed=False, strict=False)
     # links permitted
-    testargs = ['', '--crawl', 'https://httpbun.org/links/1/1', '--list', '--parallel', '1']
+    testargs = ['', '--crawl', 'https://httpbun.com/links/1/1', '--list', '--parallel', '1']
     with patch.object(sys, 'argv', testargs):
         args = cli.parse_args(testargs)
     f = io.StringIO()
     with redirect_stdout(f):
         cli_utils.cli_crawler(args)
     # possibly a bug on Github actions, should be 2 URLs
-    assert f.getvalue() in ('https://httpbun.org/links/1/1\nhttps://httpbun.org/links/1/0\n', 'https://httpbun.org/links/1/1\n')
+    assert f.getvalue() in ('https://httpbun.com/links/1/1\nhttps://httpbun.com/links/1/0\n', 'https://httpbun.com/links/1/1\n')
     spider.URL_STORE = UrlStore(compressed=False, strict=False)
     # 0 links permitted
-    args.crawl = 'https://httpbun.org/links/4/4'
+    args.crawl = 'https://httpbun.com/links/4/4'
     f = io.StringIO()
     with redirect_stdout(f):
         cli_utils.cli_crawler(args, n=0)
@@ -437,13 +437,13 @@ def test_crawling():
     spider.URL_STORE = UrlStore(compressed=False, strict=False)
 
     # Exploration (Sitemap + Crawl)
-    testargs = ['', '--explore', 'https://httpbun.org/html', '--list']
+    testargs = ['', '--explore', 'https://httpbun.com/html', '--list']
     with patch.object(sys, 'argv', testargs):
         args = cli.parse_args(testargs)
     f = io.StringIO()
     with redirect_stdout(f):
         cli.process_args(args)
-    assert f.getvalue().strip() == 'https://httpbun.org/html'
+    assert f.getvalue().strip() == 'https://httpbun.com/html'
 
 
 def test_probing():

diff --git a/tests/downloads_tests.py b/tests/downloads_tests.py
@@ -53,27 +53,27 @@ def test_fetch():
     assert _send_request('', True, DEFAULT_CONFIG) is None
 
     # is_live general tests
-    assert _urllib3_is_live_page('https://httpbun.org/status/301') is True
-    assert _urllib3_is_live_page('https://httpbun.org/status/404') is False
-    assert is_live_page('https://httpbun.org/status/403') is False
+    assert _urllib3_is_live_page('https://httpbun.com/status/301') is True
+    assert _urllib3_is_live_page('https://httpbun.com/status/404') is False
+    assert is_live_page('https://httpbun.com/status/403') is False
     # is_live pycurl tests
     if pycurl is not None:
-        assert _pycurl_is_live_page('https://httpbun.org/status/301') is True
+        assert _pycurl_is_live_page('https://httpbun.com/status/301') is True
 
     # fetch_url
     assert fetch_url('#@1234') is None
-    assert fetch_url('https://httpbun.org/status/404') is None
+    assert fetch_url('https://httpbun.com/status/404') is None
     # test if the functions default to no_ssl
     # doesn't work?
     # assert _send_request('https://expired.badssl.com/', False, DEFAULT_CONFIG) is not None
     if pycurl is not None:
         assert _send_pycurl_request('https://expired.badssl.com/', False, DEFAULT_CONFIG) is not None
     # no SSL, no decoding
-    url = 'https://httpbun.org/status/200'
-    response = _send_request('https://httpbun.org/status/200', True, DEFAULT_CONFIG)
+    url = 'https://httpbun.com/status/200'
+    response = _send_request('https://httpbun.com/status/200', True, DEFAULT_CONFIG)
     assert response.data == b''
     if pycurl is not None:
-        response1 = _send_pycurl_request('https://httpbun.org/status/200', True, DEFAULT_CONFIG)
+        response1 = _send_pycurl_request('https://httpbun.com/status/200', True, DEFAULT_CONFIG)
         assert _handle_response(url, response1, False, DEFAULT_CONFIG) == _handle_response(url, response, False, DEFAULT_CONFIG)
         assert _handle_response(url, response1, True, DEFAULT_CONFIG) == _handle_response(url, response, True, DEFAULT_CONFIG)
     # response object
@@ -155,7 +155,7 @@ def test_queue():
     testargs = ['', '-v']
     with patch.object(sys, 'argv', testargs):
         args = parse_args(testargs)
-    inputurls = ['https://httpbun.org/status/301', 'https://httpbun.org/status/304', 'https://httpbun.org/status/200', 'https://httpbun.org/status/300', 'https://httpbun.org/status/400', 'https://httpbun.org/status/505']
+    inputurls = ['https://httpbun.com/status/301', 'https://httpbun.com/status/304', 'https://httpbun.com/status/200', 'https://httpbun.com/status/300', 'https://httpbun.com/status/400', 'https://httpbun.com/status/505']
     url_store = add_to_compressed_dict(inputurls)
     args.archived = True
     args.config_file = os.path.join(RESOURCES_DIR, 'newsettings.cfg')

diff --git a/tests/feeds_tests.py b/tests/feeds_tests.py
@@ -256,7 +256,7 @@ def test_feeds_helpers():
     ) == ["https://example.org/rss"]
     # feed discovery
     assert not find_feed_urls("http://")
-    assert not find_feed_urls("https://httpbun.org/status/404")
+    assert not find_feed_urls("https://httpbun.com/status/404")
     # Feedburner/Google links
     assert handle_link_list(["https://feedproxy.google.com/ABCD"], params) == [
         "https://feedproxy.google.com/ABCD"
@@ -271,7 +271,7 @@ def test_feeds_helpers():
 
 def test_cli_behavior():
     """Test command-line interface with respect to feeds"""
-    testargs = ["", "--list", "--feed", "https://httpbun.org/xml"]
+    testargs = ["", "--list", "--feed", "https://httpbun.com/xml"]
     with patch.object(sys, "argv", testargs):
         assert main() is None
 

diff --git a/tests/sitemaps_tests.py b/tests/sitemaps_tests.py
@@ -155,7 +155,7 @@ def test_extraction():
 def test_robotstxt():
     '''Check if sitemaps can be found over robots.txt'''
     assert not sitemaps.find_robots_sitemaps('https://http.org')
-    baseurl = 'https://httpbun.org'
+    baseurl = 'https://httpbun.com'
     assert not sitemaps.find_robots_sitemaps(baseurl)
     assert not sitemaps.extract_robots_sitemaps('# test', baseurl)
     assert not sitemaps.extract_robots_sitemaps('# test'*10000, baseurl)

diff --git a/tests/spider_tests.py b/tests/spider_tests.py
@@ -27,40 +27,40 @@ def test_redirections():
     "Test redirection detection."
     _, _, baseurl = spider.probe_alternative_homepage('xyz')
     assert baseurl is None
-    _, _, baseurl = spider.probe_alternative_homepage('https://httpbun.org/redirect-to?url=https://example.org')
+    _, _, baseurl = spider.probe_alternative_homepage('https://httpbun.com/redirect-to?url=https://example.org')
     assert baseurl == 'https://example.org'
     #_, _, baseurl = spider.probe_alternative_homepage('https://httpbin.org/redirect-to?url=https%3A%2F%2Fhttpbin.org%2Fhtml&status_code=302')
 
 
 def test_meta_redirections():
     "Test redirection detection using meta tag."
     # empty
-    htmlstring, homepage = '"refresh"', 'https://httpbun.org/'
+    htmlstring, homepage = '"refresh"', 'https://httpbun.com/'
     htmlstring2, homepage2 = spider.refresh_detection(htmlstring, homepage)
     assert htmlstring2 == htmlstring and homepage2 == homepage
-    htmlstring, homepage = '<html></html>', 'https://httpbun.org/'
+    htmlstring, homepage = '<html></html>', 'https://httpbun.com/'
     htmlstring2, homepage2 = spider.refresh_detection(htmlstring, homepage)
     assert htmlstring2 == htmlstring and homepage2 == homepage
 
     # unusable
-    htmlstring, homepage = '<html>REDIRECT!</html>', 'https://httpbun.org/'
+    htmlstring, homepage = '<html>REDIRECT!</html>', 'https://httpbun.com/'
     htmlstring2, homepage2 = spider.refresh_detection(htmlstring, homepage)
     assert htmlstring2 == htmlstring and homepage2 == homepage
 
     # malformed
-    htmlstring, homepage = '<html><meta http-equiv="refresh" content="3600\n&lt;meta http-equiv=" content-type=""></html>', 'https://httpbun.org/'
+    htmlstring, homepage = '<html><meta http-equiv="refresh" content="3600\n&lt;meta http-equiv=" content-type=""></html>', 'https://httpbun.com/'
     htmlstring2, homepage2 = spider.refresh_detection(htmlstring, homepage)
     assert htmlstring2 == htmlstring and homepage2 == homepage
 
     # wrong URL
-    htmlstring, homepage = '<html><meta http-equiv="refresh" content="0; url=1234"/></html>', 'https://httpbun.org/'
+    htmlstring, homepage = '<html><meta http-equiv="refresh" content="0; url=1234"/></html>', 'https://httpbun.com/'
     htmlstring2, homepage2 = spider.refresh_detection(htmlstring, homepage)
     assert htmlstring2 is None and homepage2 is None
 
     # normal
-    htmlstring, homepage = '<html><meta http-equiv="refresh" content="0; url=https://httpbun.org/html"/></html>', 'http://test.org/'
+    htmlstring, homepage = '<html><meta http-equiv="refresh" content="0; url=https://httpbun.com/html"/></html>', 'http://test.org/'
     htmlstring2, homepage2 = spider.refresh_detection(htmlstring, homepage)
-    assert htmlstring2 is not None and homepage2 == 'https://httpbun.org/html'
+    assert htmlstring2 is not None and homepage2 == 'https://httpbun.com/html'
 
 
 def test_process_links():
@@ -103,7 +103,7 @@ def test_process_links():
 
 def test_crawl_logic():
     "Test functions related to crawling sequence and consistency."
-    url = 'https://httpbun.org/html'
+    url = 'https://httpbun.com/html'
     spider.URL_STORE = UrlStore(compressed=False, strict=False)
     # erroneous webpage
     with pytest.raises(ValueError):
@@ -118,31 +118,31 @@ def test_crawl_logic():
     base_url, i, known_num, rules, is_on = spider.init_crawl(url, None, None)
     todo = spider.URL_STORE.find_unvisited_urls(base_url)
     known_links = spider.URL_STORE.find_known_urls(base_url)
-    assert todo == [] and known_links == [url,] and base_url == 'https://httpbun.org' and i == 1
+    assert todo == [] and known_links == [url,] and base_url == 'https://httpbun.com' and i == 1
     # delay between requests
-    assert spider.URL_STORE.get_crawl_delay('https://httpbun.org') == 5
-    assert spider.URL_STORE.get_crawl_delay('https://httpbun.org', default=2.0) == 2.0
+    assert spider.URL_STORE.get_crawl_delay('https://httpbun.com') == 5
+    assert spider.URL_STORE.get_crawl_delay('https://httpbun.com', default=2.0) == 2.0
     # existing todo
     spider.URL_STORE = UrlStore(compressed=False, strict=False)
     base_url, i, known_num, rules, is_on = spider.init_crawl(url, [url,], None)
-    assert base_url == 'https://httpbun.org' and i == 0
+    assert base_url == 'https://httpbun.com' and i == 0
 
 
 def test_crawl_page():
     "Test page-by-page processing."
-    base_url = 'https://httpbun.org'
+    base_url = 'https://httpbun.com'
     spider.URL_STORE = UrlStore(compressed=False, strict=False)
-    spider.URL_STORE.add_urls(['https://httpbun.org/links/2/2'])
-    is_on, known_num, visited_num = spider.crawl_page(0, 'https://httpbun.org')
+    spider.URL_STORE.add_urls(['https://httpbun.com/links/2/2'])
+    is_on, known_num, visited_num = spider.crawl_page(0, 'https://httpbun.com')
     todo = spider.URL_STORE.find_unvisited_urls(base_url)
     known_links = spider.URL_STORE.find_known_urls(base_url)
-    assert sorted(todo) == ['https://httpbun.org/links/2/0', 'https://httpbun.org/links/2/1']
+    assert sorted(todo) == ['https://httpbun.com/links/2/0', 'https://httpbun.com/links/2/1']
     assert len(known_links) == 3 and visited_num == 1
     # initial page
     spider.URL_STORE = UrlStore(compressed=False, strict=False)
-    spider.URL_STORE.add_urls(['https://httpbun.org/html'])
+    spider.URL_STORE.add_urls(['https://httpbun.com/html'])
     # if LANGID_FLAG is True:
-    is_on, known_num, visited_num = spider.crawl_page(0, 'https://httpbun.org', initial=True, lang='de')
+    is_on, known_num, visited_num = spider.crawl_page(0, 'https://httpbun.com', initial=True, lang='de')
     todo = spider.URL_STORE.find_unvisited_urls(base_url)
     known_links = spider.URL_STORE.find_known_urls(base_url)
     assert len(todo) == 0 and len(known_links) == 1 and visited_num == 1
@@ -152,10 +152,10 @@ def test_crawl_page():
 def test_focused_crawler():
     "Test the whole focused crawler mechanism."
     spider.URL_STORE = UrlStore()
-    todo, known_links = spider.focused_crawler("https://httpbun.org/links/1/1", max_seen_urls=1)
-    ## TODO: check this on Github actions:
-    # assert sorted(known_links) == ['https://httpbun.org/links/1/0', 'https://httpbun.org/links/1/1']
-    # assert sorted(todo) == ['https://httpbun.org/links/1/0']
+    todo, known_links = spider.focused_crawler("https://httpbun.com/links/1/1", max_seen_urls=1)
+    ## fails on Github Actions
+    ## assert sorted(known_links) == ['https://httpbun.com/links/1/0', 'https://httpbun.com/links/1/1']
+    ## assert sorted(todo) == ['https://httpbun.com/links/1/0']
 
 
 if __name__ == '__main__':