-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathword2vec_keras_predict.py
60 lines (45 loc) · 1.97 KB
/
word2vec_keras_predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import csv
import pickle
from itertools import zip_longest
import keras
import json
import pandas as pd
from keras_preprocessing.sequence import pad_sequences
# получение тональностей по id
def from_tone(prediction, tones):
answers = []
for i in range(prediction.shape[0]):
answers.append(tones[prediction[i]])
return answers
# получение раннее сохраненной модели, maxlen и токенайзера
model = keras.models.load_model('models/model.h5')
f = open('models/maxlen.bin', 'r')
maxlen = int(f.read())
f.close()
with open('models/tokenizer.pickle', 'rb') as handle:
tokenizer = pickle.load(handle)
# загружаем неразмеченный словарь из json файла
with open('unallocated_words/unallocated_dictionary.json', 'r', encoding='utf-8') as f:
js = json.load(f)
df = pd.read_csv('rusentilex/rusentilex.csv')
# обработка входного текста, (из слова в вектор)
unallocated_words = [[word[0]] for word in js]
unallocated_words_tokens = tokenizer.texts_to_sequences(unallocated_words)
unallocated_words__pad = pad_sequences(unallocated_words_tokens, maxlen=maxlen)
# предсказание категорий
predict = model.predict_classes(x=unallocated_words__pad)
answers = from_tone(predict, df['tone'].unique().tolist())
# инициализация размеченного словаря
keys = [word[0] for word in js]
dictionary = dict(zip(keys, answers))
# сохраняем размеченный словарь в json файл
with open('tagged_dictionary/tagged_dictionary.json', 'w', encoding='utf-8') as f:
json.dump(dictionary, f, ensure_ascii=False, indent=4)
# и в csv файл
d = [keys, answers]
export_data = zip_longest(*d, fillvalue='')
with open('tagged_dictionary/tagged_dictionary.csv', 'w', encoding='utf-8', newline='') as f:
wr = csv.writer(f)
wr.writerow(("term", "tone"))
wr.writerows(export_data)
f.close()