-
Notifications
You must be signed in to change notification settings - Fork 163
/
Copy pathget_hot.py
91 lines (79 loc) · 2.44 KB
/
get_hot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
"""Mark hot articles which are extensively cited"""
import re
from tqdm import tqdm
import time
import os
import subprocess
import scholar
import numpy as np
def overlap(s1, s2):
s1 = replace(s1)
s2 = replace(s2)
s1 = set(s1.split())
s2 = set(s2.split())
intersec = s1 & s2
return len(intersec)/len(s1)
def replace(s0):
s0 = s0.replace('.', ' ')
s0 = s0.replace(':', ' ')
s0 = s0.lower()
return s0
def get_citations(paper, verbose=1):
def searchScholar(searchphrase, title):
query = scholar.SearchScholarQuery()
# query.set_words(searchphrase)
query.set_words(title)
querier.send_query(query)
articles = querier.articles
try:
if overlap(articles[0].attrs['title'][0], title) < 0.9:
return 0
except:
# set_new_proxy()
return -1
return articles[0].attrs['num_citations'][0]
art = ["-c", "1", "--phrase", paper]
querier = scholar.ScholarQuerier()
settings = scholar.ScholarSettings()
settings.set_citation_format(2)
querier.apply_settings(settings)
cites = searchScholar(art, paper)
while cites == -1:
searchScholar(art, paper)
if verbose:
print(f'{paper}: ', cites)
return cites
def get_papers(filename):
papers = []
paper2line = {}
with open(filename, 'r') as f:
for num, line in enumerate(f.readlines()):
if 'Other related papers' in line: # do not count them
break
if '[paper]' in line:
res = re.findall('\*\*.+\*\*', line)[0]
res = res[2:-2]
paper2line[len(papers)] = num
papers.append(res)
print(res)
return papers, paper2line
papers, paper2line = get_papers('README.md')
citations = []
for id, p in tqdm(enumerate(papers)):
time.sleep(2)
citations.append(get_citations(p))
idx = np.arange(len(citations))
citations = np.array(citations)
hot_id = idx[citations>80]
with open('README.md', 'r') as f:
content = f.readlines()
with open('README_old.md', 'w') as f:
f.write(' '.join(content))
for p in hot_id:
line = paper2line[p]
old_content = content[line]
num = re.findall(r'\d+', old_content)[0] # the number before '.'
new_content = old_content[:len(num)+2] + ":fire:" + old_content[len(num)+2:]
content[line] = new_content
with open('README.md', 'w') as f:
f.write(' '.join(content))