-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathcrawlplot.py
59 lines (53 loc) · 2.12 KB
/
crawlplot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import json
import logging
import os.path
PLOTLIB = 'rpy2.ggplot2'
PLOTDIR = 'plots'
if PLOTLIB == 'ggplot':
from ggplot import *
elif PLOTLIB == 'rpy2.ggplot2':
from rpy2.robjects.lib import ggplot2
from rpy2.robjects import pandas2ri
pandas2ri.activate()
GGPLOT2_THEME = ggplot2.theme_minimal()
# GGPLOT2_THEME = ggplot2.theme_grey()
class CrawlPlot:
def read_data(self, stream):
for line in stream:
keyval = line.split('\t')
if len(keyval) == 2:
key = json.loads(keyval[0])
val = json.loads(keyval[1])
self.add(key, val)
else:
logging.error("Not a key-value pair: {}".find(line))
def line_plot(self, data, title, ylabel, img_file,
x='date', y='size', c='type', clabel='', ratio=1.0):
if PLOTLIB == 'ggplot':
# date_label = "%Y\n%b"
date_label = "%Y\n%W" # year + week number
p = ggplot(data,
aes(x=x, y=y, color=c)) \
+ ggtitle(title) \
+ ylab(ylabel) \
+ xlab(' ') \
+ scale_x_date(breaks=date_breaks('3 months'),
labels=date_label) \
+ geom_line() + geom_point()
elif PLOTLIB == 'rpy2.ggplot2':
# convert y axis to float because R uses 32-bit signed integers,
# values >= 2 bln. (2^31) will overflow
data[y] = data[y].astype(float)
if y != 'size' and 'size' in data.columns:
data['size'] = data['size'].astype(float)
p = ggplot2.ggplot(data) \
+ ggplot2.aes_string(x=x, y=y, color=c) \
+ ggplot2.geom_line(size=.2) + ggplot2.geom_point() \
+ GGPLOT2_THEME \
+ ggplot2.theme(**{'legend.position': 'bottom',
'aspect.ratio': ratio}) \
+ ggplot2.labs(title=title, x='', y=ylabel, color=clabel)
img_path = os.path.join(PLOTDIR, img_file)
p.save(img_path)
# data.to_csv(img_path + '.csv')
return p