forked from dustindikes/epocket
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.py
executable file
·36 lines (25 loc) · 1.1 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env python3
import hashlib
import feedparser
from newspaper import Article
out = '/home/dustin/public_html/pocket/'
unreadFeed = feedparser.parse('http://getpocket.com/users/dustindikes/feed/unread')
indexHtml = '<!DOCTYPE html><html lang="en"><head><title>Pocket Articles</title></head><body><ul>'
for e in unreadFeed.entries:
articleHtml = '<!DOCTYPE html><html lang="en"><head><title>'+e.title+'</title><style type="text/css">body{font-size:20px;}</style></head><body>'
guid = hashlib.md5(e.id.encode()).hexdigest()
indexHtml += '<li><a href="'+guid+'.html">'+e.title+'</a></li>'
article = Article(e.link)
article.download()
article.parse()
articleHtml += '<h1>'+e.title+'</h1>'
articleHtml += '<p><strong>Source: ' + e.link + '</strong></p><p>'
articleHtml += article.text.replace('\n','<br>')
articleHtml += '</p></body></html>'
articleFile = open(out + guid + '.html', 'w')
articleFile.write(articleHtml)
articleFile.close()
indexHtml += '</ul></body></html>'
indexFile = open(out + 'index.html', 'w')
indexFile.write(indexHtml)
indexFile.close()