From 1b6661afb8deb6bfd8600d4419d28f6e9c468cee Mon Sep 17 00:00:00 2001 From: naibo Date: Mon, 22 Apr 2024 06:33:23 +0800 Subject: [PATCH] V0.6.2 --- .../Code/easyspider_executestage.py | 169 ++-- .../EasySpider_windows_x64/Code/utils.py | 37 +- .temp_to_pub/EasySpider_windows_x64/myCode.py | 19 +- .../EasySpider_windows_x64/tasks/112.json | 2 +- .../EasySpider_windows_x64/tasks/212.json | 872 +++++++++++++++++- .../EasySpider_windows_x64/tasks/228.json | 2 +- .../EasySpider_windows_x64/tasks/229.json | 2 +- .../EasySpider_windows_x64/tasks/295.json | 2 +- .../EasySpider_windows_x64/tasks/70.json | 2 +- .../EasySpider_windows_x64/tasks/95.json | 2 +- .temp_to_pub/compress.py | 2 +- ElectronJS/EasySpider_en.crx | Bin 211968 -> 211968 bytes ElectronJS/EasySpider_zh.crx | Bin 211969 -> 211969 bytes ElectronJS/change_version.py | 2 +- ElectronJS/main.js | 18 +- ElectronJS/package.json | 6 +- ElectronJS/src/index.html | 2 +- ElectronJS/src/taskGrid/FlowChart.html | 10 +- ElectronJS/src/taskGrid/FlowChart_CN.html | 10 +- ElectronJS/src/taskGrid/logic.js | 6 +- ElectronJS/src/taskGrid/newTask.html | 2 +- ElectronJS/tasks/318.json | 1 + ElectronJS/tasks/319.json | 1 + ElectronJS/tasks/320.json | 1 + ExecuteStage/.vscode/launch.json | 2 +- ExecuteStage/easyspider_executestage.py | 23 +- Extension/manifest_v3/package.json | 2 +- Extension/manifest_v3/src/manifest.json | 2 +- 28 files changed, 1105 insertions(+), 94 deletions(-) create mode 100644 ElectronJS/tasks/318.json create mode 100644 ElectronJS/tasks/319.json create mode 100644 ElectronJS/tasks/320.json diff --git a/.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py b/.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py index 4ce4a488..7e8464f7 100644 --- a/.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py +++ b/.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py @@ -5,9 +5,10 @@ import platform import shutil import string +import threading # import undetected_chromedriver as uc from utils import detect_optimizable, download_image, extract_text_from_html, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, \ - on_press_creator, on_release_creator, readCode, replace_field_values, send_email, split_text_by_lines, write_to_csv, write_to_excel, write_to_json + on_press_creator, on_release_creator, readCode, rename_downloaded_file, replace_field_values, send_email, split_text_by_lines, write_to_csv, write_to_excel, write_to_json from myChrome import MyChrome from threading import Thread, Event from PIL import Image @@ -112,9 +113,13 @@ def __init__(self, browser_t, id, service, version, event, saveName, config, opt self.print_and_log("Save Name for task ID", id, "is:", self.saveName) if not os.path.exists("Data/Task_" + str(id)): os.mkdir("Data/Task_" + str(id)) - if not os.path.exists("Data/Task_" + str(id) + "/" + self.saveName): - os.mkdir("Data/Task_" + str(id) + "/" + - self.saveName) # 创建保存文件夹用来保存截图 + self.downloadFolder = "Data/Task_" + str(id) + "/" + self.saveName + if not os.path.exists(self.downloadFolder): + os.mkdir(self.downloadFolder) # 创建保存文件夹用来保存截图和文件 + if not os.path.exists(self.downloadFolder + "/files"): + os.mkdir(self.downloadFolder + "/files") + if not os.path.exists(self.downloadFolder + "/images"): + os.mkdir(self.downloadFolder + "/images") self.getDataStep = 0 self.startSteps = 0 try: @@ -142,12 +147,21 @@ def __init__(self, browser_t, id, service, version, event, saveName, config, opt self.print_and_log("Loading stealth.min.js") self.browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', { 'source': js}) # TMALL 反扒 + self.browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { + "source": """ + Object.defineProperty(navigator, 'webdriver', { + get: () => undefined + }) + """ + }) WebDriverWait(self.browser, 10) self.browser.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command') - path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(self.id)) + path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(self.id), self.saveName, "files") self.paramss = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': path}} - - self.browser.execute("send_command", self.paramss) # 下载地址改变 + self.browser.execute("send_command", self.paramss) # 下载目录改变 + self.monitor_event = threading.Event() + self.monitor_thread = threading.Thread(target=rename_downloaded_file, args=(path, self.monitor_event)) #path后面的逗号不能省略,是元组固定写法 + self.monitor_thread.start() # self.browser.get('about:blank') self.procedure = service["graph"] # 程序执行流程 try: @@ -187,12 +201,19 @@ def __init__(self, browser_t, id, service, version, event, saveName, config, opt self.links = list(filter(isnotnull, service["url"])) # 要执行的link self.OUTPUT = [] # 采集的数据 try: - self.dataWriteMode = service["dataWriteMode"] # 数据写入模式,1为追加,2为覆盖 + self.dataWriteMode = service["dataWriteMode"] # 数据写入模式,1为追加,2为覆盖,3为重命名文件 except: self.dataWriteMode = 1 if self.outputFormat == "csv" or self.outputFormat == "txt" or self.outputFormat == "xlsx" or self.outputFormat == "json": - if self.dataWriteMode == 2 and os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat): - os.remove("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat) + if os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat): + if self.dataWriteMode == 2: + os.remove("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat) + elif self.dataWriteMode == 3: + i = 2 + while os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '_' + str(i) + '.' + self.outputFormat): + i = i + 1 + self.saveName = self.saveName + '_' + str(i) + self.print_and_log("文件已存在,已重命名为", self.saveName) self.writeMode = 1 # 写入模式,0为新建,1为追加 if self.outputFormat == "csv" or self.outputFormat == "txt" or self.outputFormat == "xlsx": if not os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat): @@ -521,7 +542,7 @@ def run(self): "/", len(self.links)) self.executeNode(0) self.urlId = self.urlId + 1 - files = os.listdir("Data/Task_" + str(self.id) + "/" + self.saveName) + # files = os.listdir("Data/Task_" + str(self.id) + "/" + self.saveName) # 如果目录为空,则删除该目录 # if not files: # os.rmdir("Data/Task_" + str(self.id) + "/" + self.saveName) @@ -544,6 +565,7 @@ def run(self): shutil.rmtree(self.option["tmp_user_data_folder"]) except: pass + self.monitor_event.set() self.print_and_log("清理完成!|Clean up completed!") self.print_and_log("您现在可以安全的关闭此窗口了。|You can safely close this window now.") @@ -768,6 +790,8 @@ def execute_code(self, codeMode, code, max_wait_time, element=None, iframe=False elif int(codeMode) == 5: try: code = readCode(code) + # global_namespace = globals().copy() + # global_namespace["self"] = self output = exec(code) self.recordLog("执行下面的代码:" + code) self.recordLog("Execute the following code:" + code) @@ -847,6 +871,23 @@ def customOperation(self, node, loopValue, loopPath, index): self.print_and_log("根据设置的自定义操作,任务已刷新页面|Task refreshed page according to custom operation") elif codeMode == 9: # 发送邮件 send_email(node["parameters"]["emailConfig"]) + elif codeMode == 10: # 清空所有字段值 + self.clearOutputParameters() + elif codeMode == 11: # 生成新的数据行 + line = new_line(self.outputParameters, + self.maxViewLength, self.outputParametersRecord) + self.OUTPUT.append(line) + elif codeMode == 12: # 退出程序 + self.print_and_log("根据设置的自定义操作,任务已退出|Task exited according to custom operation") + self.saveData(exit=True) + self.browser.quit() + self.print_and_log("正在清理临时用户目录……|Cleaning up temporary user directory...") + try: + shutil.rmtree(self.option["tmp_user_data_folder"]) + except: + pass + self.print_and_log("清理完成!|Clean up completed!") + os._exit(0) else: # 0 1 5 6 output = self.execute_code( codeMode, code, max_wait_time, iframe=params["iframe"]) @@ -1106,7 +1147,25 @@ def judgeExecute(self, node, loopElement, clickPath="", index=0): self.recordLog( "判断条件内所有条件分支的条件都不满足|None of the conditions in the judgment condition are met") - def handleHistory(self, node, xpath, thisHistoryURL, thisHistoryLength, index, element=None, elements=None): + def handleHistory(self, node, xpath, thisHandle, thisHistoryURL, thisHistoryLength, index, element=None, elements=None): + try: + changed_handle = self.browser.current_window_handle != thisHandle + except: # 如果网页被意外关闭了的情况下 + self.browser.switch_to.window( + self.browser.window_handles[-1]) + changed_handle = self.browser.window_handles[-1] != thisHandle + if changed_handle: # 如果执行完一次循环之后标签页的位置发生了变化 + try: + while True: # 一直关闭窗口直到当前标签页 + self.browser.close() # 关闭使用完的标签页 + self.browser.switch_to.window( + self.browser.window_handles[-1]) + if self.browser.current_window_handle == thisHandle: + break + except Exception as e: + self.print_and_log("关闭标签页发生错误:", e) + self.print_and_log( + "Error occurred while closing tab: ", e) if self.history["index"] != thisHistoryLength and self.history["handle"] == self.browser.current_window_handle: # 如果执行完一次循环之后历史记录发生了变化,注意当前页面的判断 difference = thisHistoryLength - self.history["index"] # 计算历史记录变化差值 self.browser.execute_script('history.go(' + str(difference) + ')') # 回退历史记录 @@ -1132,12 +1191,13 @@ def handleHistory(self, node, xpath, thisHistoryURL, thisHistoryLength, index, e if self.browser.current_url == thisHistoryURL or ti > thisHistoryLength: # 如果执行完一次循环之后网址发生了变化 break time.sleep(2) - if element == None: # 不固定元素列表 - element = self.browser.find_elements(By.XPATH, xpath, iframe=node["parameters"]["iframe"]) - else: # 固定元素列表 - element = self.browser.find_element(By.XPATH, xpath, iframe=node["parameters"]["iframe"]) - # if index > 0: - # index -= 1 # 如果是data:开头的网址,就要重试一次 + if xpath != "": + if element == None: # 不固定元素列表 + element = self.browser.find_elements(By.XPATH, xpath, iframe=node["parameters"]["iframe"]) + else: # 固定元素列表 + element = self.browser.find_element(By.XPATH, xpath, iframe=node["parameters"]["iframe"]) + # if index > 0: + # index -= 1 # 如果是data:开头的网址,就要重试一次 else: if element == None: element = elements @@ -1321,25 +1381,7 @@ def loopExecute(self, node, loopValue, clickPath="", index=0): if self.BREAK: self.BREAK = False break - try: - changed_handle = self.browser.current_window_handle != thisHandle - except: # 如果网页被意外关闭了的情况下 - self.browser.switch_to.window( - self.browser.window_handles[-1]) - changed_handle = self.browser.window_handles[-1] != thisHandle - if changed_handle: # 如果执行完一次循环之后标签页的位置发生了变化 - try: - while True: # 一直关闭窗口直到当前标签页 - self.browser.close() # 关闭使用完的标签页 - self.browser.switch_to.window( - self.browser.window_handles[-1]) - if self.browser.current_window_handle == thisHandle: - break - except Exception as e: - self.print_and_log("关闭标签页发生错误:", e) - self.print_and_log( - "Error occurred while closing tab: ", e) - index, elements = self.handleHistory(node, xpath, thisHistoryURL, thisHistoryLength, index, elements=elements) + index, elements = self.handleHistory(node, xpath, thisHandle, thisHistoryURL, thisHistoryLength, index, elements=elements) if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件 output = self.execute_code(int( node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], @@ -1381,25 +1423,7 @@ def loopExecute(self, node, loopValue, clickPath="", index=0): if self.BREAK: self.BREAK = False break - try: - changed_handle = self.browser.current_window_handle != thisHandle - except: # 如果网页被意外关闭了的情况下 - self.browser.switch_to.window( - self.browser.window_handles[-1]) - changed_handle = self.browser.window_handles[-1] != thisHandle - if changed_handle: # 如果执行完一次循环之后标签页的位置发生了变化 - try: - while True: # 一直关闭窗口直到当前标签页 - self.browser.close() # 关闭使用完的标签页 - self.browser.switch_to.window( - self.browser.window_handles[-1]) - if self.browser.current_window_handle == thisHandle: - break - except Exception as e: - self.print_and_log("关闭标签页发生错误:", e) - self.print_and_log( - "Error occurred while closing tab: ", e) - index, element = self.handleHistory(node, path, thisHistoryURL, thisHistoryLength, index, element=element) + index, element = self.handleHistory(node, path, thisHandle, thisHistoryURL, thisHistoryLength, index, element=element) except NoSuchElementException: self.print_and_log("Loop element not found: ", path) self.print_and_log("找不到循环元素:", path) @@ -1447,6 +1471,7 @@ def loopExecute(self, node, loopValue, clickPath="", index=0): code = get_output_code(output) if code <= 0: break + index, _ = self.handleHistory(node, "", thisHandle, thisHistoryURL, thisHistoryLength, index) elif int(node["parameters"]["loopType"]) == 4: # 固定网址列表 # tempList = node["parameters"]["textList"].split("\r\n") urlList = list( @@ -1715,6 +1740,21 @@ def clickElement(self, param, loopElement=None, clickPath="", index=0): script = 'var result = document.evaluate(`' + path + \ '`, document, null, XPathResult.ANY_TYPE, null);for(let i=0;i=J2jKtlx=Kd`PpTS3<=YeED>>K9Z@PNrZI$?vV_Z(t(mMLTV$P~ zxoR*nV;M`tRI+6sxt8l%!sVWtb2{hu-|KmQKIb{lInVQOTj1fgK&ks+9c_vtf-QswrO3Ct4P7lr(<4kl6grbv5y< zX8gp&%%ejuq%-?zhTvR6L+4ycRLs+hbdyEcXw5AFSWWt4qI$oUs25a6of5&3=8wE% zbl~FHU0-1}rux7*W44g|(6ZjVsAIo(W+~U7r2S853GH2f*ntYY@%>z7{$*-teCza@ z@td8iw*&TQZKoRNAPHh+Hj#nJ5hD!ddp7#{qlGBj=z$mi^3x>mx6(TY^ zjuXmv*FR0*PIeHEXPcxNMPWBAl8L=%#`w~EyTV6Z@2kmJXgCBVRFMiQ#%FA3bI;yt zTIJ_VIE$RwHm>hstqm&O#)l$Or3P7c%yNr9IVID#5|&kA7Z&2gCz=2$sB3di87* zs5ZSm)bLe-ac8Ky($^DjpNd|Rg!zQyNKZCW9@xUjE`O~#IiWMJi|^mn<^;^$6|7gu z4cWn~=Hrw>!~X)-2VJ30uO3Y!3anuE6?{EsBnQU-k`&if^Dlhjo$sl5_U<0f`JNRJ8@X=Pk%%P$w7H+cHb57uVy?Xtc@;5*1DveI>k zEWC|v7xKkL3+ld(lf)iroox?xZP&FlPPKqn1*<04!JHL}1tiwxAMI5P*{(5Cc=}@I zGO+r)h%g5w+%X^_OCazFyQhF}VPMkG!_iYJU}t+O+`skuN9a~V^con(CnDpYNU07Y z(35ejZ2Oz9TpVoy@G%e+3I$!&A-a)$z|l6~AIc~Xk|CL!0EfvChb*CA5xLcCM)+q% zE)PnV;L1FXafmBOQkxq0Z!fv?Ct|WF@mUm}*I5)^AHSc;>pcm8$;TyA5wPX9BZ=t) z@`SqEUvx#rHpIXrUMsC(BaHI+8$9Q|f*SaDq(?H^X)-Te(sG}MU?e2Yw4&qdSU(JmCu_@!hh>B}Vy90hdDTf(3Krlh%08Vh zmhbfc_UiEAez1xofD#yTpQpFPRcP;O5_B>6`mdbmsK->bVNeOAmo5WK#e zoXU;rg%{lkR1d_mxnkq%yX~9!(z?qQ>>Safh#F)0GG+9b8C9!|TIPqvGw2PR>K-Gx z%l!9{NYiI3c}bIUPh%ImnvTYrT9;4pOw4dbA*v%rD9CvycNC@^UNgG;r*JHRbB%)d zY;`mCitW$}OJ8SQebkNNmNHCm-h>6ft>=Yo>KjefHKHQv_nuvxs;xYE>m}}vY!$_k z>EX3HhS9Jb$NS9|&>VE_M<4^C!}KGvOKH-W(P*uktE%Oi3jae?G{toDjL^?hQwQtC zy69+Q}3t5|!6svQh7ii;X@$M|DV-ss3Fh+{<(6} zhm*i5u8x@iW?WtRrOaO%G69Uaufj+G#g!`(u;!`=30QJ9j|3dJLYe|r&>y$^V-QlT NDPX|O-wdP@%5Jc(kpW4 z5Go~}fYjOQH4QY+^I-RAi6Qge1erBd8#YBgB|00c>S~~ls@`DXj(A)faxaLG1udx% z_;m8>{Nq5~vE4aXA;f$$-AKB!^Wi%M=IHJQEvp3H^9_`SH#q5IDio!wG~IRpvOJ^d zRR=o=4`gV~k7GqT9ps6iJm~b?EjSdL2!kw7^32elO+ri?>jp6tV?DBUCz+U5xSd#a z%yxejae-bYbpQLR>?+ht)k`jUukhIsPN7qOs;l}`$@ZHQpu@q-EO+YhQ`s3uh@~R9WJC@B1#t#7}lZaud;2PYf-+n~~*W@&0Ml*Ug4i zwCDDvhU!+S{HsehgOR-oh{Np%UAkH>X;aDFmT$!Mr^7htu)}*w9-QmBC1LzdVPOWK zp7V-j^{C#()4bq7brXzs!35?tAdWB4oG$NkU_QETsB%YL%Pnxo%JG3s!laVl0%f>d zPI#i6?8qp95hs+u2zEL9EyTSh6LI9OO3}B$MukyJLT^oD=afm0#8}IR{`;87)Gcz_ za-zv_?S8puSP^$hL-V=qZ)=8IW0S5`9(;**RH^s-tjOo+z70sho%r^+D?2RhWA&kn zYZkGUU4HH!XS#VKLx1X8GMulxy{3kBlhzrd^=|23C-Ph$`w9VI5Lhk&;^s=%890vO zgtt_VE*64E$FZXp5>gr_7k57s5Z)N|G(&qK?Rgw$4<#{(Klp6CrEymWyWqL8&6*2{ zm9koVvT-c6DWWBMWH&kbvKt}XUXI>o)3euvR{yl4ke1L?JzYkK$$D}s#kI#NW<2^D zJz$L3CG&oAd9&yD3D)FAMI619bb$7tX2yYecVd`#>kpW2U%$!D?Q!?IoCS1iwxW&* zJc8?`>dcL&1|*=FNFQT0y3}a#BjC#d@)cXVD%9WvuPNkK&R^UoWwB`?ahZV{9rCr& zAfjF3H0eug8M*CY>F6;xj^moY!_K~5@HjEH`s|Us;;K}O?>UbyWdu$A+djih(KDc> z*|R9pbWForv1l|HF7g}`elDz0JJKO;g#7j@q&X^TIV$>rA@1C06XuhLv<)*MCZlJz zjv_0WJN>eKuRB7L8uQIbIr&uJ;3{Jy?&Ud~rwRFzw2l_Y+>C%+aJNHm?RA^6+N>HG;2vzo%J2G=pGg?#;gCUv`qfOc^K+tmQHpdI6YKJW+e zrQMvKZ1bm+TD~;3T&$+^z>ZJD@)a4V6%~QE^*36xO%Ug;bW-oY7D_0SOFgmu>-in= z{R(p)3N@mIi^9s9p?oU7BJLGb>UIZTH?inLb4V)>i$TUa;?f^hj)827V$$*~pPL`| z3_PtDbyP3$u!3K~FiIv~wxo7BQ;$d)$+G|Nd4H{-mDdCI?2mPt{PIR#$3z87KR@UEBCt(FmsIv#jP*(>)87dMwx zL+c=MNWp^*&@WaOi+#$C%B0}q`m#jxJ2K2h8)HF^%XQbb}5LRtUtAw zv?AgtwpOcjQuK3NQt+U0YDMXm-9MKzCqQ!t1MEpH=H6m(u>E@#C+PucJ1+A+y&T)u zy}NvQUtw|kYO_HSPBfxT-_>_MSg9}N>wo0;yZ?6l>pZf2U|;28R;h?292JK@nSFB> zG_;l$wcNk<#1|i0h-9=o+jD5_{GO{HUGWKhW5Lt&Yqhz@VgSeJtnL#mQ zND>(lT69qf*%`a*P_FHsnsYkm_uuPze?I3q&pFSN{GBKHJ5RP2?i1c_tEa%oKGpfp z2BR`+{xA;f$zDha0yU>$tO~t~pkvaU%HyatTRjQ1MoR`n@>9 z>GzSIbr!hhhAcLovWKtnBB&j_feVB5Mc}<3UE!j3 zOef_kY)`x}+J_@E#x2BB3YiccPEr3lwIl1kw@8$3Xg3R1<1GLDRKbXKq=mF#i>3{txp>Z|IfAI}w_WjzfG(AjZ)T(*?sQgE0q(=$M$ ztBEYn-Q2z-rCftbD6GwVaiu=xdiW~;gqos_aw9{1Q0|`qm{m+&5A$)A@Y4)S`LTVR z;gKWmI|c|Ig1U{~&$scSB&tYvc**pwpl4sd9gdQHFz>ss3!S8vQXdKNB}ZX1%?O#g zi7hM9TDnIGko+u%-M}}YD>{7^Wj2FRSdz z8Kz{dKH5l7cS4!Vr+ZKPhhi$%lkhmnx0IQSLnDcpBay{(3)gJ}QA2oa$C-qN5S5Hs ze*KBhXs6;!hv#|s2GDVthZA(C8BtqDYgoE4@s7OqXwn5=!=f7Mv&+yl$ztnQ_3Yw< zEeMI)Bz0_7sW`^4B7ivtexlt@gI5>%`^5esyW#aua0NrQat&IT0?AfivXqnoxK?04VJeW8MSJ`ZJF z-Y~a28DmD=NVJ-S-thikEN69c208qMMZ<}rqw!WX*ElGTlVbMAHLb+ksf_tNbtUV{5DvSTYx$76`*cS2qil|-g^ZY+6YHdia z2up^tdv>u57uee$4lL0WSb|m48fE2Fj z#up8N$z3Rzbjnn9R0k_J^(>0{URn`+58;}Dc9<^B6*W124(q*}Hs6J6o_hZ6_U3j! zDMPc7Xb&+aOLkW@r*3}Ud!?K4zUy1YiQw(yzbPCN{JVrXqjdMCa}?h4NOH|5%POgU zpVfK}j(1>rI{?Kqa&4*m1(B?Fpn@CHjPl8gc`bT+9HJTNsI=cWAnLKH?+QER$$UWB zZCrjhaJS7BwqYnkZ;fE=MO8OlH6D9#<}B>7u2hX2dQ!hl^<`U)cX(89cMG#|NJsMO z=@|VLL zZ%tyAO{Su}7s{X8YFrwJ4Br~<<~qNYBaWTGs|Ic<)T&5dfRWo%2H3{CE!(m`G(!hs ziaaFN(knVW#)4$~qcjv$op-MMMW691EUW&m)?-t&H%&7=iIzh*ifL3WAJQ8v#?oZ1 z2US)Aa^}9~NoF-w@mXqYE}mS`oG(kMkNt);59l!!<|qqMdv617yR z?aHHsXls8#6{YsQsW;q!KNX)IbH4T;g=vRVQH9OSBpV_HB@;$>NCIIp>zh)U#qFf{BgodE^=I`(ggRg%f6t)G$A$1&(6a0FYP=No0H{2 z^ZNNQuj>>Ow$?TtA@@(AG|NPpWd2qF5)Ut)teW>TUNG2EX`hD)od8THnHX^zV7Pt6F-c-<)w! z4$zu$eobTqmQghH;jNlIBkdcVCVg_tUwS*A)eB)f^R50pG+M|5zufb!!`Rwp>o65) zaol#pr_hAhU=q@;XrszH1Yrdq%gf}YI|*6$-#@U8+=@C6m>;Ynd% zC4S>*MuWX4=s2q$&M|AD5PF7N-9sm~)SN%kDoVqJF4DxdXG|Dhf@cn4MKnCN11f`? zQ|YO9Od(yBgircBMM-kMvNmxYB z-croj!9X@fv2PjGFW<~MU`qW0WiUUnQl$kGV-?3c+jeTZklUKl0X{WL!Z&j0qqUTl z23+-Hyz-I04eEo%azy;;maX-?KyC4+P0sFR?iw!x9c^*PSzQD>(R<}H%+*`NM*WSO z6M8IuJW|O)@V)Qy6#H>ZnVFci_OH5oP|#FvueciJ+11@!F%+)2IZ&CQ^Kz^Q1hb*9a5+<=`zxNjQr=AoxfQZp6tnDL+JN}m*^VYVP|VKH)F zVRd2F_&7AG-!I#~h;w_#O?>t1JhUhiJGHB|eHrl^g=kNUi~>iu%TSIuZk&R~#g(r? zkYqP?VZBKNd?FSi2oRQ9KIu|nu!ye}Qj9I-S;5uFJ3-B31U5)YMA9fW(DPe`S*)J? z=6SMehX(=<1W5xR5C|XvP4;NK1|m3uzUxs{_^EtL6QFP-*qJQ;GivPg{v`3EYOD&+ zk>$%Gg(1WjIKAgI|L<^P{|^u;O(aT_KntZw@EZC@(sCpV>=og&PX!3)x67B&11jRW zF0JTDwykSvU&~UVAT-@acs{M$ei9J74E=B?v?<3aNrcVn&$zF=SwmXb8pOQYE$&Nv zC%1xuCxf$V9Y!Bo_LLc`mzH+H8q?3(sHcoU6OD}Ag1vpm5rxn@0v{Cm8 zHq^PRs)PSk&cAdLCXpyv)4ZLjgCk96BiGE)+Sgf(5oE=u7EN?Q+kO}xl3Wo%ot!Jn z)KvYLh^HUlpPnB&S@??FyCe9bT|rlX6M1yU(Peyy_*Ufs2Bsb?d+I9X@9wabY+69Z=p-Lx0Y`Muo! zbhGUdReM*jr2C=!gH=x~%!A*Qb{8BeLH?Iyc{Pyd% z@39>7FtE*l5C*>UUpOZsb_OA-h&s>CA@@L(20RTWk2mtv1vuc4F0Q*2cR-Qj@3^eEK z7ZacvU+zEk_@^dJfJXdRNf_`PUmh^v1-_oafYy9%z<|ztnVJG^Am4BI`ylx#ra*mu J{@N61_YbRbXa4{I diff --git a/ElectronJS/change_version.py b/ElectronJS/change_version.py index 1c1b5c5c..58eebec4 100644 --- a/ElectronJS/change_version.py +++ b/ElectronJS/change_version.py @@ -30,7 +30,7 @@ def update_file_version(file_path, new_version, key="当前版本/Current Versio file.write(line) -version = "0.6.0" +version = "0.6.2" # py html js diff --git a/ElectronJS/main.js b/ElectronJS/main.js index 2b57fcc8..210e0e67 100644 --- a/ElectronJS/main.js +++ b/ElectronJS/main.js @@ -651,7 +651,11 @@ async function beginInvoke(msg, ws) { if (parameters.xpath.includes("point(")) { await click_element(element, point); } else { - await click_element(element); + if (parameters.clickWay == 2){ //双击 + await click_element(element, "double"); + } else { + await click_element(element); //单击 + } } let alertHandleType = parameters.alertHandleType; if (alertHandleType == 1) { @@ -1002,6 +1006,14 @@ async function beginInvoke(msg, ws) { "Attribute value obtained: " + result, "success" ); + } else if(param.contentType == 15) { + //元素的属性值 + let result = param.JS; + notify_browser( + "获取的常量值:" + result, + "Constant value obtained: " + result, + "success" + ); } else { //其他暂不支持 notify_browser( @@ -1130,6 +1142,8 @@ async function click_element(element, type = "click") { // await actions.click().perform(); let script = `document.elementFromPoint(${x}, ${y}).click();`; await driver.executeScript(script); + } else if (type == "double") { + await driver.actions().doubleClick(element).perform(); } else { await element.click(); } @@ -1341,6 +1355,8 @@ async function runBrowser(lang = "en", user_data_folder = "", mobile = false) { let options = new chrome.Options(); options.addArguments("--disable-blink-features=AutomationControlled"); options.addArguments("--disable-infobars"); + options.addArguments("--disable-web-security"); + options.addArguments("--disable-features=CrossSiteDocumentBlockingIfIsolating,CrossSiteDocumentBlockingAlways,IsolateOrigins,site-per-process"); // 添加实验性选项以排除'enable-automation'开关 options.set("excludeSwitches", ["enable-automation"]); options.excludeSwitches("enable-automation"); diff --git a/ElectronJS/package.json b/ElectronJS/package.json index 3d99760a..3f1464a3 100644 --- a/ElectronJS/package.json +++ b/ElectronJS/package.json @@ -1,7 +1,7 @@ { "name": "easy-spider", "productName": "EasySpider", - "version": "0.6.0", + "version": "0.6.2", "icon": "./favicon", "description": "NoCode Visual Web Crawler", "main": "main.js", @@ -67,7 +67,7 @@ ], "packagerConfig": { "icon": "./favicon", - "appVersion": "0.6.0", + "appVersion": "0.6.2", "name": "EasySpider", "executableName": "EasySpider", "appCopyright": "Naibo Wang (naibowang@foxmail.com)", @@ -80,4 +80,4 @@ "publishers": [] } } -} +} \ No newline at end of file diff --git a/ElectronJS/src/index.html b/ElectronJS/src/index.html index 55472f22..86705e76 100644 --- a/ElectronJS/src/index.html +++ b/ElectronJS/src/index.html @@ -40,7 +40,7 @@
选择语言/Select Language

English

-

当前版本/Current Version: v0.6.0

+

当前版本/Current Version: v0.6.2

Github最新版本/Newest Version:{{newest_version}}

diff --git a/ElectronJS/src/taskGrid/FlowChart.html b/ElectronJS/src/taskGrid/FlowChart.html index 1b8c4b42..6e6dd64b 100644 --- a/ElectronJS/src/taskGrid/FlowChart.html +++ b/ElectronJS/src/taskGrid/FlowChart.html @@ -170,10 +170,11 @@ - + -
+
+ + +
+
diff --git a/ElectronJS/src/taskGrid/FlowChart_CN.html b/ElectronJS/src/taskGrid/FlowChart_CN.html index 146b0cf7..a546c669 100644 --- a/ElectronJS/src/taskGrid/FlowChart_CN.html +++ b/ElectronJS/src/taskGrid/FlowChart_CN.html @@ -170,10 +170,11 @@
- + -
+
+ + +
+
diff --git a/ElectronJS/src/taskGrid/logic.js b/ElectronJS/src/taskGrid/logic.js index 42ec662a..6b294245 100644 --- a/ElectronJS/src/taskGrid/logic.js +++ b/ElectronJS/src/taskGrid/logic.js @@ -446,7 +446,7 @@ function modifyParameters(t, param) { } } -function showSuccess(msg, time = 4000) { +function showSuccess(msg, time = 1000) { $("#tip").text(msg); $("#tip").slideDown(); //提示框 let fadeout = setTimeout(function () { @@ -491,7 +491,7 @@ if (mobile == "true") { } let serviceInfo = { - "version": "0.6.0" + "version": "0.6.2" }; function saveService(type) { @@ -625,7 +625,7 @@ function saveService(type) { "links": links, "create_time": $("#create_time").val(), "update_time": formatDateTime(new Date()), - "version": "0.6.0", + "version": "0.6.2", "saveThreshold": saveThreshold, // "cloudflare": cloudflare, "quitWaitTime": parseInt($("#quitWaitTime").val()), diff --git a/ElectronJS/src/taskGrid/newTask.html b/ElectronJS/src/taskGrid/newTask.html index aed9e389..79cdccc0 100644 --- a/ElectronJS/src/taskGrid/newTask.html +++ b/ElectronJS/src/taskGrid/newTask.html @@ -33,7 +33,7 @@

{{"New Task~新任务" | lang}}

- +
diff --git a/ElectronJS/tasks/318.json b/ElectronJS/tasks/318.json new file mode 100644 index 00000000..0e801321 --- /dev/null +++ b/ElectronJS/tasks/318.json @@ -0,0 +1 @@ +{"id":318,"name":"京东(JD.COM)-正品低价、品质保障、配送及时、轻松购物!","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"2024-04-22 05:08:03","update_time":"2024-04-22 05:19:48","version":"0.6.2","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"csv","saveName":"current_time","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","removeDuplicate":0,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"电脑数码"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://prodev.jd.com/mall/active/31XPWPTonxJ9e5YoQ85HS7z8XNYQ/index.html?babelChannel=ttt40"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环采集数据","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[4]/div[1]/div[4]/ul[1]/li/a[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"skipCount":0,"allXPaths":["/html/body/div[1]/div[4]/div[1]/div[4]/ul[1]/li[1]/a[1]","//a[contains(., '电脑数码')]","//A[@class='navitems-lk']","/html/body/div[last()-5]/div[last()-2]/div/div[last()-1]/ul/li[last()-8]/a"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"params":[{"nodeType":1,"contentType":15,"relative":true,"name":"参数1_链接文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"电脑数码"}],"unique_index":"auwkv5g1krqlva0tsc4","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"123","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0,"splitLine":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数2_链接地址","desc":"","relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://prodev.jd.com/mall/active/31XPWPTonxJ9e5YoQ85HS7z8XNYQ/index.html?babelChannel=ttt40"}],"unique_index":"auwkv5g1krqlva0tsc4","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0,"splitLine":0}]}}]} \ No newline at end of file diff --git a/ElectronJS/tasks/319.json b/ElectronJS/tasks/319.json new file mode 100644 index 00000000..0bfc4383 --- /dev/null +++ b/ElectronJS/tasks/319.json @@ -0,0 +1 @@ +{"id":-2,"name":"百度一下,你就知道","url":"https://www.baidu.com?id=1","links":"https://www.baidu.com?id=11\nhttps://www.baidu.com?id=12","create_time":"2024-04-22 05:45:12","update_time":"2024-04-22 05:45:20","version":"0.6.2","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"csv","saveName":"current_time","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","removeDuplicate":0,"desc":"https://www.baidu.com?id=1","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.baidu.com?id=11\nhttps://www.baidu.com?id=12","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.baidu.com?id=11\nhttps://www.baidu.com?id=12"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.baidu.com?id=1","links":"https://www.baidu.com?id=11\nhttps://www.baidu.com?id=12","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}}]} \ No newline at end of file diff --git a/ElectronJS/tasks/320.json b/ElectronJS/tasks/320.json new file mode 100644 index 00000000..eb8d7f8b --- /dev/null +++ b/ElectronJS/tasks/320.json @@ -0,0 +1 @@ +{"id":320,"name":"百度一下,你就知道","url":"https://www.baidu.com","links":"https://www.baidu.com","create_time":"2024-04-22 05:53:18","update_time":"2024-04-22 05:53:28","version":"0.6.2","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"csv","saveName":"current_time","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","removeDuplicate":0,"desc":"https://www.baidu.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.baidu.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.baidu.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.baidu.com","links":"https://www.baidu.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环点击每个元素","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[2]/div[1]/div[5]/div[1]/div[1]/div[3]/ul[1]/li/a[1]/span[2]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"skipCount":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"newTab":1,"maxWaitTime":10,"params":[],"alertHandleType":0,"downloadWaitTime":3600,"allXPaths":""}}]} \ No newline at end of file diff --git a/ExecuteStage/.vscode/launch.json b/ExecuteStage/.vscode/launch.json index cbbb457e..0759921c 100644 --- a/ExecuteStage/.vscode/launch.json +++ b/ExecuteStage/.vscode/launch.json @@ -12,7 +12,7 @@ "justMyCode": false, // "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"] // "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"] - "args": ["--ids", "[79]", "--headless", "0", "--user_data", "0", "--keyboard", "0", + "args": ["--ids", "[83]", "--headless", "0", "--user_data", "0", "--keyboard", "0", "--read_type", "remote"] // "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name" } diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index 54bbbba8..7e8464f7 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -1740,6 +1740,21 @@ def clickElement(self, param, loopElement=None, clickPath="", index=0): script = 'var result = document.evaluate(`' + path + \ '`, document, null, XPathResult.ANY_TYPE, null);for(let i=0;i https的重定向 + options.add_argument("--disable-features=CrossSiteDocumentBlockingIfIsolating,CrossSiteDocumentBlockingAlways,IsolateOrigins,site-per-process") + options.add_argument("--disable-web-security") # 禁用同源策略 options.add_argument('-ignore-certificate-errors') options.add_argument('-ignore -ssl-errors') diff --git a/Extension/manifest_v3/package.json b/Extension/manifest_v3/package.json index 1dc776e3..cb319858 100644 --- a/Extension/manifest_v3/package.json +++ b/Extension/manifest_v3/package.json @@ -1,6 +1,6 @@ { "name": "EasySpider", - "version": "0.6.0", + "version": "0.6.2", "type": "module", "scripts": { "build": "rollup -c", diff --git a/Extension/manifest_v3/src/manifest.json b/Extension/manifest_v3/src/manifest.json index 569ef775..7d9ba01c 100644 --- a/Extension/manifest_v3/src/manifest.json +++ b/Extension/manifest_v3/src/manifest.json @@ -1,6 +1,6 @@ { "name": "EasySpider", - "version": "0.6.0", + "version": "0.6.2", "description": "EasySpider's chrome extension", "author": "Naibo Wang", "manifest_version": 3,