From f567e4edc37bdb04961affafb7a81f6b08bfa71f Mon Sep 17 00:00:00 2001 From: naibo Date: Fri, 28 Jul 2023 17:24:01 +0800 Subject: [PATCH] Update cloudflare --- ExecuteStage/.vscode/launch.json | 2 +- ExecuteStage/easyspider_executestage.py | 16 +++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/ExecuteStage/.vscode/launch.json b/ExecuteStage/.vscode/launch.json index e4213de8..f080af6c 100644 --- a/ExecuteStage/.vscode/launch.json +++ b/ExecuteStage/.vscode/launch.json @@ -12,7 +12,7 @@ "justMyCode": false, // "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"] // "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"] - "args": ["--id", "[85]", "--headless", "0", "--user_data", "1", "--keyboard", "0"] + "args": ["--id", "[25]", "--headless", "0", "--user_data", "0", "--keyboard", "0"] } ] } \ No newline at end of file diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index f0032bc1..96314b2d 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -41,7 +41,7 @@ from lxml import etree import onnxruntime onnxruntime.set_default_logger_severity(3) # 隐藏onnxruntime的日志 -# import undetected_chromedriver as uc +import undetected_chromedriver as uc # import pandas as pd # import numpy # import pytesseract @@ -116,7 +116,7 @@ def __init__(self, browser_t, id, service, version, event, saveName, config): self.browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', { 'source': js}) # TMALL 反扒 WebDriverWait(self.browser, 10) - self.browser.get('about:blank') + # self.browser.get('about:blank') self.procedure = service["graph"] # 程序执行流程 try: self.maxViewLength = service["maxViewLength"] # 最大显示长度 @@ -729,7 +729,8 @@ def executeNode(self, nodeId, loopValue="", loopPath="", index=0): for i in node["sequence"]: # 从根节点开始向下读取 self.executeNode(i, loopValue, loopPath, index) elif node["option"] == 1: # 打开网页操作 - self.openPage(node["parameters"], loopValue) + if not (nodeId == 1 and self.service["cloudflare"] == 1): + self.openPage(node["parameters"], loopValue) elif node["option"] == 2: # 点击元素 self.clickElement(node["parameters"], loopValue, loopPath, index) elif node["option"] == 3: # 提取数据 @@ -1938,9 +1939,14 @@ def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0): elif cloudflare == 1: if sys.platform == "win32": options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器 + # options.add_argument("--auto-open-devtools-for-tabs") # options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器 - browser_t = MyUCChrome( - options=options, driver_executable_path=driver_path) + browser_t = MyUCChrome(options=options, driver_executable_path=driver_path) + links = list(filter(isnotnull, service["links"].split("\n"))) + browser_t.execute_script('window.open("'+ links[0] +'","_blank");') # open page in new tab + time.sleep(5) # wait until page has loaded + browser_t.switch_to.window(browser_t.window_handles[1]) # switch to new tab + # browser_t = uc.Chrome() else: print("Cloudflare模式只支持Windows x64平台。") print(