-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSimilarity_Matrix_v1.py
188 lines (151 loc) · 7.15 KB
/
Similarity_Matrix_v1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import numpy as np
import data_normalizer as nz
class Similarity_Matrix:
__term_level_sim = [] # term level similarity graph
__semantic_sim = [] # semantic similarity graph
__similarity_graph = [] # total similarity = term + semantic
__tweets = [] # a list that keeps all tweets
__normilize_max = 10
__normilize_min = 0
def initialize(self, tweet_count):
self.__term_level_sim = np.zeros((tweet_count, tweet_count))
self.__semantic_sim = np.zeros((tweet_count, tweet_count))
# total similarity between tweets = term_level + semantic
self.__similarity_graph = np.zeros((tweet_count, tweet_count))
# ~~~~~~~~~~~~~ term level similarity ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def get__term_level_sim(self, *args):
# returns all similarity matrix
if len(args) == 0:
return self.__term_level_sim
# returns list of similarity vaules with tweet row
elif len(args) == 1:
row = args[0]
return self.__term_level_sim[row]
# returns similarity value between tweets row and col
elif len(args) == 2:
row = args[0]
col = args[1]
return self.__term_level_sim[row][col]
else:
raise ValueError("get__term_level_sim is excpected at most 2 args but got {}.".format(len(args)))
def set__term_level_sim(self, row, col, value):
""" Sets value to the given index"""
if value == -1:
pass # if value is -1, this means that we are passing these tweets
elif not value.isNumeric():
raise TypeError('Value must be numeric!')
else:
self.__term_level_sim[row][col] = value
self.__term_level_sim = nz.minMax(self.__term_level_sim, self.__normilize_max, self.__normilize_min)
def increase_term_level_sim(self, row, col, inc_value):
""" Increases term level similarity value with given value.
After all values added, normalizes all values
:param row: row value in similarity matrix or row-th tweet
:param col: col value in similarity matrix or col-th tweet
:param inc_value: increment value
"""
self.__term_level_sim[row][col] += inc_value
# normalization version
# if row == self.tweet_len() - 2 and col == self.tweet_len() - 1:
# self.__term_level_sim[row][col] += inc_value
# print("Before normilize ,{}-{}".format(row,col))
# self.printSlice(self.__term_level_sim, [250, 250, 255, 255])
# self.__term_level_sim = nz.minMax(self.__term_level_sim, self.__normilize_max, self.__normilize_min)
# print("\nAfter normilize ")
# self.printSlice(self.__term_level_sim, [250, 250, 255, 255])
#
# else:
# self.__term_level_sim[row][col] += inc_value
# ~~~~~~~~~~~~~~~~~~~~ semantic similarity ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def get__semantic_sim(self, *args):
# returns all similarity matrix
if len(args) == 0:
return self.__semantic_sim
# returns list of similarity vaules with tweet row
elif len(args) == 1:
row = args[0]
return self.__semantic_sim[row]
# returns similarity value between tweets row and col
elif len(args) == 2:
row = args[0]
col = args[1]
return self.__semantic_sim[row][col]
else:
raise ValueError("get__semantic_level_sim is excpected at most 2 args but got {}.".format(len(args)))
def set__semantic_sim(self, row, col, value):
""" Sets value to the given index"""
if value == -1:
pass # if value is -1, this means that we are passing these tweets
elif not value.isNumeric():
raise TypeError('Value must be numeric!')
else:
self.__semantic_sim[row][col] = value
def increase__semantic_sim(self, row, col, inc_value):
self.__semantic_sim[row][col] += inc_value
def __isPrivate(self):
print("You called a private func")
# ~~~~~~~~~~~~~final similarity ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def get__similarity_graph(self, *args):
# returns all similarity matrix
if len(args) == 0:
return self.__similarity_graph
# returns list of similarity vaules with tweet row
elif len(args) == 1:
row = args[0]
return self.__similarity_graph[row]
# returns similarity value between tweets row and col
elif len(args) == 2:
row = args[0]
col = args[1]
return self.__similarity_graph[row][col]
else:
raise ValueError("get__term_level_sim is excpected at most 2 args but got {}.".format(len(args)))
def create_similarity_graph(self):
self.__similarity_graph = self.__term_level_sim + self.__semantic_sim
# ~~~~~~~~~~~~~~~~~~~~~~ Tweets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def get__tweets(self, *args):
if len(args) == 0:
return self.__tweets
elif len(args) > 1:
raise ValueError("get_tweets expected at most 1 argument got {} .".format(len(args)))
else:
index = args[0]
return self.__tweets[index]
def tweet_len(self):
return len(self.__tweets)
# ~~~~~~~~~~~~~~~~~~ reading file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def openFile(self, fin_name):
# opening file
try:
fin = open(fin_name, 'r')
except IOError as e:
print("Something bad happened with files:\n", e)
exit(-1)
# reading all file to a list, so working on them will be easier
for line in fin:
line = line.lower() # case fold all tweets
line = line.strip() # strip blank spaces from starting and end
self.__tweets.append(line)
def printSlice(self, matrix, coordinate=[0, 0, 99, 99], round_step=5):
"""
Prints given part of matrix to the screen
:param matrix: name of the matrix
:param coordinate: print slice, first 2 top-left coordinate of slice, last 2 bottom-right coordinate
:param round_step: rounds results with given step, default 5
"""
print("\n>>>>>> Printing values between {},{} and {},{} <<<<<<\n".format(coordinate[0], coordinate[1], coordinate[2], coordinate[3]))
# print("Column> ", end='')
# for j in range(coordinate[1], coordinate[3]):
# print("C{:3}".format(j), end='')
# print(" ", end="")
print()
for i in range(coordinate[0], coordinate[2]):
#print("R{:3}".format(i),end='')
print('[',end='')
for j in range(coordinate[1], coordinate[3]):
print("{:7},".format(round(matrix[i][j], round_step)), end=' ')
print(']',end='')
print(",")
######################################################################################################
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ END of Summarize class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
######################################################################################################