-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspider_techAI_send_ForWindows_V1.2Re.py
238 lines (201 loc) · 8.22 KB
/
spider_techAI_send_ForWindows_V1.2Re.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
#拼接字符串并换行## -*- coding: UTF-8 -*-
#@author: JACK YANG 201902-->10-->202008->10 [email protected]
#!/usr/bin/python3
import smtplib
#from smtplib import SMTP
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.image import MIMEImage #20180603add JACK
from email.header import Header
import ssl
import sys,os #os.listdir 201902
import time
import glob #查找通配文件 201902
from email.utils import formataddr
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import re
import json
import codecs # use for write a file 0708
my_sender='[email protected]' #发件人邮箱账号,为了后面易于维护,所以写成了变量
receiver='[email protected]' #收件人邮箱账号
#receiver=my_sender
_pwd = "xxx" #需在qq邮箱开启SMTP服务并获取授权码
def make_img_msg(fn):
f=open(fn,'rb') # r--->rb read+binary 0603
data=f.read()
f.close()
image=MIMEImage(data,name=fn.split("/")[2]) #以/分隔目录文件/tmp/xxx.jpg,只要后面的文件名 20190222!
#image.add_header('Content-ID','attachment;filenam="%s" ' %fn)
image.add_header('Content-ID','EangelCam2020') #发送的图片附件名称 0603
return image
def get_file_list(file_path):
#dir_list = os.listdir(file_path)
#print ('"%s"\n' %dir )
dir_list=glob.glob("/tmp/*.jpg")
print (dir_list)
if not dir_list:
return
else:
# 注意,这里使用lambda表达式,将文件按照最后修改时间顺序升序排列
# os.path.getmtime() 函数是获取文件最后修改时间
dir_list = sorted(dir_list, key=lambda x: os.path.getmtime(os.path.join(file_path, x)))
# print(dir_list)
return dir_list
class GrabNews():
def __init__(self):
self.NewsList = []
def getNews(self):
url = 'https://techcrunch.com/'
r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser")
#for news in newsList:
#for string in news.stripped_strings:
#newsUrl = 'http://eis.whu.edu.cn/' + news['href']
#self.NewsList.append({string:newsUrl})
for news in soup.select('.post-block__title a'):
for string in news.stripped_strings:
tittle=news.text
#article.append(tittle.strip()) #strip去处多余空格
print(news.text)
newsUrl=news.attrs['href']
#article.append(url.strip())
#print(a)
self.NewsList.append({string:newsUrl})
class GrabNews2():
def __init__(self):
self.NewsList = []
def getNews(self):
url = 'https://tech.sina.com.cn/'
r2 = requests.get(url)
r2.encoding = 'utf-8'
soup = BeautifulSoup(r2.text, "html.parser")
for news in soup.select('.tech-news li a'):
tittle=news.text
print(news.text)
for string in news.stripped_strings:
#tittle=news.text
#article.append(tittle.strip()) #strip去处多余空格
#print(news.text)
newsUrl=news.attrs['href']
#article.append(url.strip())
print(newsUrl)
self.NewsList.append({string:newsUrl})
class GrabNewsTechnet():
def __init__(self):
self.NewsList = []
def getNews(self):
url = 'http://stdaily.com/'
r2 = requests.get(url)
r2.encoding = 'utf-8'
soup = BeautifulSoup(r2.text, "html.parser")
for news in soup.select('div.fp_subtitle a'): ##ti_news---->fp_title
#for news in soup.select('div.ti_news a'):
tittle=news.text
print(news.text)
for string in news.stripped_strings:
#tittle=news.text
#article.append(tittle.strip()) #strip去处多余空格
#print(news.text)
if news.attrs['href'].startswith('http'):
newsUrl=news.attrs['href']
else:
newsUrl=url+news.attrs['href']
#article.append(url.strip())
print(newsUrl)
self.NewsList.append({string:newsUrl})
class GrabNewsAI():
def __init__(self):
self.NewsList = []
def getNews(self):
url = 'https://aitopics.org/search'
r2 = requests.get(url)
r2.encoding = 'utf-8'
soup = BeautifulSoup(r2.text, "html.parser")
for news in soup.select('.searchtitle a'):
tittle=news.text
print(news.text)
for string in news.stripped_strings:
newsUrl=news.attrs['href']
#article.append(url.strip())
print(newsUrl)
self.NewsList.append({string:newsUrl})
# get the sys date and hour,minutes!!
now_time = datetime.now()
date=datetime.now().strftime('%Y-%m-%d_%H:%M')
print (date)
#adopt from other article
def writeNews():
grabNews = GrabNews()
grabNews.getNews()
fp = codecs.open('news%s.html' % date , 'a', 'utf-8')
for news in grabNews.NewsList:
for key in news.keys(): # key:value. key是新闻标题,value是新闻链接
fp.write('<a href=%s>%s</a>' % (news[key], '*'+key))
fp.write('<hr />')
fp.close()
def writeNews2():
grabNews = GrabNews2()
grabNews.getNews()
#print("test write 0711")
fp = codecs.open('news%s.html' % date , 'a', 'utf-8') #w---->a 改为追加内容的模式07
for news in grabNews.NewsList:
for key in news.keys(): # key:value. key是新闻标题,value是新闻链接
fp.write('<a href=%s>%s</a>' % (news[key], '*'+key))
fp.write('<hr />')
fp.close()
def writeNewsTechNet():
grabNews = GrabNewsTechnet()
grabNews.getNews()
fp = codecs.open('news%s.html' % date , 'a', 'utf-8')
for news in grabNews.NewsList:
for key in news.keys(): # key:value. key是新闻标题,value是新闻链接
fp.write('<a href=%s>%s</a>' % (news[key], '*'+key))
fp.write('<hr />')
fp.close()
#adopt AI from other article
def writeNewsAI():
print("SEARCH AI news")
grabNews = GrabNewsAI()
grabNews.getNews()
fp = codecs.open('news%s.html' % date, 'w', 'utf-8')
for news in grabNews.NewsList:
for key in news.keys(): # key:value. key是新闻标题,value是新闻链接
fp.write('<a href=%s>%s</a>' % (news[key], '*'+key))
fp.write('<hr />')
fp.close()
def mail():
ret=True
try:
#msg = MIMEMultipart('alternative')
msg = MIMEMultipart() # test two html file 201907
writeNewsAI()
writeNews()
#writeNews2()
writeNewsTechNet()
fp = open('news%s.html' % date,'rb+')
techHtml = MIMEText(fp.read(), 'html', 'utf-8') #内容, 格式, 编码 English web 20190711--->fp.read().decode('utf-8')
msg.attach(techHtml)
fp.close
pic=None
print (pic)
if pic is None:
print ("no picture captured!")
else:
print ("no pic!")
#msg.attach(make_img_msg(imgPath))
msg['From']=formataddr(["smart Robot",my_sender]) #括号里的对应发件人邮箱昵称、发件人邮箱账号
msg['To']=formataddr(["亲爱的玩家",receiver]) #括号里的对应收件人邮箱昵称、收件人邮箱账号
msg['Subject']="Robot agent 2020" #邮件的主题,也可以说是标题
server=smtplib.SMTP_SSL("smtp.qq.com",465) #发件人邮箱中的SMTP服务器,端口是25 (默认)---------->465
server.login(my_sender,_pwd) #括号中对应的是发件人邮箱账号、邮箱密码
server.sendmail(my_sender,[receiver,],msg.as_string()) #括号中对应的是发件人邮箱账号、收件人邮箱账号、发送邮件
print ('SEND AI NEWS OK')
server.quit() #关闭连接
except Exception as e: #如果try中的语句没有执行,则会执行下面的ret=False
print (str(e))
ret=False
return ret
if __name__ == '__main__':
mail()