-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathisolation_degree.py
49 lines (36 loc) · 1.44 KB
/
isolation_degree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from pymorphy3 import MorphAnalyzer
def change_text_isolation_degree(text, isolation_degree):
"""
Return a list of list and set:
List[0] is a list of isolated word tokens;
List[1] is a set with indexes of isolated tokens;
:param text: list of work tokens
:type text: list
:param isolation_degree: isolation degree from 0 to 3
:type isolation_degree: int
"""
changed_tokens = set()
if isolation_degree == 0:
return [text, changed_tokens]
morph = MorphAnalyzer()
word_tokens = text.copy()
index = 0
for token in word_tokens:
parsed_word = morph.parse(token)
if isolation_degree == 1:
if parsed_word[0].tag.POS == "VERB":
word_tokens[index] = parsed_word[0].normal_form
changed_tokens.add(index)
elif isolation_degree == 2:
if parsed_word[0].tag.POS == "VERB":
word_tokens[index] = parsed_word[0].normal_form
changed_tokens.add(index)
if parsed_word[0].tag.POS == "NOUN" or parsed_word[0].tag.POS == "NPRO":
word_tokens[index] = parsed_word[0].normal_form
changed_tokens.add(index)
elif isolation_degree == 3:
if word_tokens[index] != parsed_word[0].normal_form:
changed_tokens.add(index)
word_tokens[index] = parsed_word[0].normal_form
index += 1
return [word_tokens, changed_tokens]