Skip to content

Commit

Permalink
更新项目代码
Browse files Browse the repository at this point in the history
  • Loading branch information
JoeanAmier committed Dec 6, 2023
1 parent 395d81c commit 413168f
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 84 deletions.
41 changes: 25 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
<h1>📑 功能清单</h1>
<ul>
<li>✅ 采集小红书图文/视频作品信息</li>
<li>✅ 获取小红书图文/视频作品文件下载地址</li>
<li>✅ 提取小红书图文/视频作品文件下载地址</li>
<li>✅ 下载小红书无水印图文/视频作品文件</li>
<li>✅ 自动跳过已下载的作品文件</li>
<li>✅ 作品文件完整性处理机制</li>
<li>☑️ 采集作品信息储存至文件</li>
</ul>
<h1>📸 程序截图</h1>
<br>
Expand All @@ -25,7 +26,7 @@
<li><code>https://www.xiaohongshu.com/discovery/item/作品ID</code></li>
<li><code>https://xhslink.com/分享码</code></li>
<br/>
<p><b>可以单次输入多个作品链接,链接之间使用空格分隔。</b></p>
<p><b>支持单次输入多个作品链接,链接之间使用空格分隔。</b></p>
</ul>
<h1>🪟 关于终端</h1>
<p>⭐ 推荐使用 <a href="https://learn.microsoft.com/zh-cn/windows/terminal/install">Windows 终端</a> (Windows 11 自带默认终端)运行程序以便获得最佳显示效果!</p>
Expand All @@ -41,32 +42,34 @@
<li>运行 <code>main.py</code> 即可使用</li>
</ol>
<h2>💻 二次开发</h2>
<p>如果想要获取小红书图文/视频作品信息,可以根据 <code>main.py</code> 的注释提示进行代码调用。</p>
<p>如果需要获取小红书图文/视频作品信息,可以根据 <code>main.py</code> 的注释提示进行代码调用。</p>
<pre>
# 测试链接
error_demo = "https://github.com/JoeanAmier/XHS_Downloader"
image_demo = "https://www.xiaohongshu.com/explore/63b275a30000000019020185"
video_demo = "https://www.xiaohongshu.com/explore/64edb460000000001f03cadc"
multiple_demo = f"{image_demo} {video_demo}"
# 实例对象
path = "D:\\" # 作品下载储存根路径,默认值:当前路径
path = "" # 作品下载储存根路径,默认值:当前路径
folder = "Download" # 作品下载文件夹名称(自动创建),默认值:Download
proxies = None # 网络代理
user_agent = "" # 请求头 User-Agent
proxy = None # 网络代理
timeout = 5 # 网络请求超时限制,默认值:10
chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位字节
# with XHS() as xhs:
# async with XHS() as xhs:
# pass # 使用默认参数
with XHS(path=path,
folder=folder,
proxies=proxies,
timeout=timeout,
chunk=chunk) as xhs: # 使用自定义参数
async with XHS(path=path,
folder=folder,
user_agent=user_agent,
proxy=proxy,
timeout=timeout,
chunk=chunk) as xhs: # 使用自定义参数
download = True # 是否下载作品文件,默认值:False
# 返回作品详细信息,包括下载地址
print(xhs.extract(error_demo)) # 获取数据失败时返回空字典
print(xhs.extract(image_demo, download=download))
print(xhs.extract(video_demo, download=download))
print(xhs.extract(multiple_demo, download=download))
print(await xhs.extract(error_demo, download=download)) # 获取数据失败时返回空字典
print(await xhs.extract(image_demo, download=download))
print(await xhs.extract(video_demo, download=download))
print(await xhs.extract(multiple_demo, download=download)) # 支持传入多个作品链接
</pre>
<h1>⚙️ 配置文件</h1>
<p>项目根目录下的 <code>settings.json</code> 文件,首次运行自动生成,可以自定义部分运行参数。</p>
Expand All @@ -83,7 +86,7 @@ with XHS(path=path,
<tr>
<td align="center">path</td>
<td align="center">str</td>
<td align="center">作品文件储存根路径</td>
<td align="center">作品数据 / 文件保存根路径</td>
<td align="center">项目根路径</td>
</tr>
<tr>
Expand All @@ -93,6 +96,12 @@ with XHS(path=path,
<td align="center">Download</td>
</tr>
<tr>
<td align="center">user_agent</td>
<td align="center">str</td>
<td align="center">请求头 User-Agent</td>
<td align="center">内置 UA</td>
</tr>
<tr>
<td align="center">proxy</td>
<td align="center">str</td>
<td align="center">设置代理</td>
Expand Down
18 changes: 10 additions & 8 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,26 @@ async def example():
video_demo = "https://www.xiaohongshu.com/explore/64edb460000000001f03cadc"
multiple_demo = f"{image_demo} {video_demo}"
# 实例对象
path = "D:\\" # 作品下载储存根路径,默认值:当前路径
path = "" # 作品下载储存根路径,默认值:当前路径
folder = "Download" # 作品下载文件夹名称(自动创建),默认值:Download
proxies = None # 网络代理
user_agent = "" # 请求头 User-Agent
proxy = None # 网络代理
timeout = 5 # 网络请求超时限制,默认值:10
chunk = 1024 * 1024 # 下载文件时,每次从服务器获取的数据块大小,单位字节
async with XHS() as xhs:
pass # 使用默认参数
# async with XHS() as xhs:
# pass # 使用默认参数
async with XHS(path=path,
folder=folder,
proxy=proxies,
user_agent=user_agent,
proxy=proxy,
timeout=timeout,
chunk=chunk) as xhs: # 使用自定义参数
download = False # 是否下载作品文件,默认值:False
download = True # 是否下载作品文件,默认值:False
# 返回作品详细信息,包括下载地址
print(await xhs.extract(error_demo)) # 获取数据失败时返回空字典
print(await xhs.extract(error_demo, download=download)) # 获取数据失败时返回空字典
print(await xhs.extract(image_demo, download=download))
print(await xhs.extract(video_demo, download=download))
print(await xhs.extract(multiple_demo, download=download))
print(await xhs.extract(multiple_demo, download=download)) # 支持传入多个作品链接


if __name__ == '__main__':
Expand Down
58 changes: 26 additions & 32 deletions source/Downloader.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
from pathlib import Path

from aiohttp import ClientConnectionError
from aiohttp import ClientProxyConnectionError
from aiohttp import ClientSSLError
from aiohttp import ClientSession

# from aiohttp import ClientTimeout
from aiohttp import ClientTimeout
from aiohttp import ServerTimeoutError

__all__ = ['Download']

Expand All @@ -26,8 +23,9 @@ def __init__(
self.root = self.__init_root(root, path, folder)
self.proxy = proxy
self.chunk = chunk
# self.timeout = ClientTimeout(total=timeout)
self.session = ClientSession(headers=manager.headers)
self.session = ClientSession(
headers=manager.headers,
timeout=ClientTimeout(connect=timeout))

def __init_root(self, root: Path, path: str, folder: str) -> Path:
if path and (r := Path(path)).is_dir():
Expand All @@ -42,8 +40,10 @@ async def run(self, urls: list, name: str, type_: int, log, bar):
if type_ == 0:
await self.__download(urls[0], f"{name}.mp4", log, bar)
elif type_ == 1:
for index, url in enumerate(urls):
await self.__download(url, f"{name}_{index + 1}.png", log, bar)
for index, url in enumerate(urls, start=1):
await self.__download(url, f"{name}_{index}.png", log, bar)
else:
raise ValueError

async def __download(self, url: str, name: str, log, bar):
temp = self.temp.joinpath(name)
Expand All @@ -52,32 +52,26 @@ async def __download(self, url: str, name: str, log, bar):
return
try:
async with self.session.get(url, proxy=self.proxy) as response:
# self.__create_progress(bar, int(response.headers.get('content-length', 0)))
self.__create_progress(
bar, int(
response.headers.get(
'content-length', 0)) or None)
with temp.open("wb") as f:
async for chunk in response.content.iter_chunked(self.chunk):
f.write(chunk)
# self.__update_progress(bar, len(chunk))
# self.__remove_progress(bar)
self.__update_progress(bar, len(chunk))
self.manager.move(temp, file)
except (
ClientProxyConnectionError,
ClientSSLError,
ClientConnectionError,
TimeoutError,
):
self.__create_progress(bar, None)
except ServerTimeoutError:
self.manager.delete(temp)
# self.__remove_progress(bar)
self.__create_progress(bar, None)

@staticmethod
def __create_progress(bar, total: int | None):
if bar:
bar.update(total=total)

# @staticmethod
# def __create_progress(bar, total: int | None):
# if bar:
# bar.update(total=total)
#
# @staticmethod
# def __update_progress(bar, advance: int):
# if bar:
# bar.advance(advance)
#
# @staticmethod
# def __remove_progress(bar):
# pass
@staticmethod
def __update_progress(bar, advance: int):
if bar:
bar.advance(advance)
17 changes: 6 additions & 11 deletions source/Html.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from aiohttp import ClientConnectionError
from aiohttp import ClientProxyConnectionError
from aiohttp import ClientSSLError
from aiohttp import ClientSession

# from aiohttp import ClientTimeout
from aiohttp import ClientTimeout
from aiohttp import ServerTimeoutError

__all__ = ['Html']

Expand All @@ -18,7 +15,9 @@ def __init__(
self.proxy = proxy
self.session = ClientSession(
headers=headers | {
"Referer": "https://www.xiaohongshu.com/", })
"Referer": "https://www.xiaohongshu.com/", },
timeout=ClientTimeout(connect=timeout),
)

async def request_url(
self,
Expand All @@ -30,11 +29,7 @@ async def request_url(
proxy=self.proxy,
) as response:
return await response.text() if text else response.url
except (
ClientProxyConnectionError,
ClientSSLError,
ClientConnectionError,
):
except ServerTimeoutError:
return ""

@staticmethod
Expand Down
9 changes: 4 additions & 5 deletions source/Manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,11 @@


class Manager:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/119.0.0.0 Safari/537.36", }

def __init__(self, root: Path):
def __init__(self, root: Path, ua: str):
self.temp = root.joinpath("./temp")
self.headers = {
"User-Agent": ua or "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0", }

@staticmethod
def is_exists(path: Path) -> bool:
Expand Down
3 changes: 2 additions & 1 deletion source/Settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ class Settings:
default = {
"path": "",
"folder": "Download",
"proxies": None,
"user_agent": "",
"proxy": "",
"timeout": 10,
"chunk": 1024 * 1024,
}
Expand Down
30 changes: 19 additions & 11 deletions source/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,27 @@

class XHS:
ROOT = Path(__file__).resolve().parent.parent
link = compile(r"https://www\.xiaohongshu\.com/explore/[a-z0-9]+")
share = compile(r"https://www\.xiaohongshu\.com/discovery/item/[a-z0-9]+")
short = compile(r"https://xhslink\.com/[A-Za-z0-9]+")
LINK = compile(r"https://www\.xiaohongshu\.com/explore/[a-z0-9]+")
SHARE = compile(r"https://www\.xiaohongshu\.com/discovery/item/[a-z0-9]+")
SHORT = compile(r"https://xhslink\.com/[A-Za-z0-9]+")
__INSTANCE = None

def __new__(cls, *args, **kwargs):
if not cls.__INSTANCE:
cls.__INSTANCE = super().__new__(cls)
return cls.__INSTANCE

def __init__(
self,
path="",
folder="Download",
proxy=None,
user_agent: str = None,
proxy: str = None,
timeout=10,
chunk=1024 * 1024,
**kwargs,
):
self.manager = Manager(self.ROOT)
self.manager = Manager(self.ROOT, user_agent)
self.html = Html(self.manager.headers, proxy, timeout)
self.image = Image()
self.video = Video()
Expand Down Expand Up @@ -81,12 +88,12 @@ async def extract(self, url: str, download=False, log=None, bar=None) -> list[di
async def __deal_links(self, url: str) -> list:
urls = []
for i in url.split():
if u := self.short.search(i):
if u := self.SHORT.search(i):
i = await self.html.request_url(
u.group(), False)
if u := self.share.search(i):
if u := self.SHARE.search(i):
urls.append(u.group())
elif u := self.link.search(i):
elif u := self.LINK.search(i):
urls.append(u.group())
return urls

Expand Down Expand Up @@ -118,18 +125,19 @@ async def __aexit__(self, exc_type, exc_value, traceback):
await self.html.session.close()
await self.download.session.close()

def rich_log(self, log, text, style="b bright_green"):
@staticmethod
def rich_log(log, text, style="b bright_green"):
if log:
log.write(Text(text, style=style))
else:
self.console.print(text, style=style)
print(text)


class XHSDownloader(App):
VERSION = 1.6
BETA = True
ROOT = Path(__file__).resolve().parent.parent
APP = XHS(**Settings(ROOT).run())
# APP = XHS(**Settings(ROOT).run())
CSS_PATH = ROOT.joinpath(
"static/XHS-Downloader.tcss")
BINDINGS = [
Expand Down

0 comments on commit 413168f

Please sign in to comment.