forked from rndastech/Internship-Recommendor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathss2.py
84 lines (65 loc) · 3.38 KB
/
ss2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# Define a function to merge skills and companies
def merge_skills_and_companies(df):
for index, row in df.iterrows():
# Merge all elements in the Skill column for the current row
merged_skill = ''.join(row['Skill'])
df.at[index, 'Skill'] = merged_skill
# Merge all elements in the Companies column for the current row
merged_companies = ''.join(row['Companies']).replace(' ', '')
df.at[index, 'Companies'] = merged_companies
return df
# Load data containing skills, industries, companies, and popularity scores
df = pd.read_csv('SkillSet2.csv')
# Call the function to merge the skills and companies
df = merge_skills_and_companies(df)
# Initialize CountVectorizer
cv = CountVectorizer(max_features=5000, stop_words='english')
vector = cv.fit_transform(df['Companies']).toarray()
# Compute cosine similarity matrix
sim = cosine_similarity(vector)
# Define a function to recommend similar skills
def recommend(skill):
L2 = []
if skill.strip(): # Check if the skill is not empty
if skill in df['Skill'].values:
indices = df[df['Skill'] == skill].index
if len(indices) > 0:
index = indices[0]
distances = sorted(enumerate(sim[index]), reverse=True, key=lambda x: x[1])
# Sort distances based on popularity index of recommended skills
distances = sorted(distances[1:6], key=lambda x: df.iloc[x[0]]['Popularity Index'], reverse=True)
for i in distances:
L2.append(df.iloc[i[0]]['Skill']) # Append the recommended skills
else:
print(f"The skill '{skill}' is not in the dataset.")
else:
print(f"The skill '{skill}' is not in the dataset.")
else:
print("Please provide a valid skill name.")
return L2
# Provide the path to your text file
file_path = 'input1.txt'
try:
with open(file_path, 'r') as file:
content = file.read().strip()
recommendations = recommend(content) # Call the function with a skill title
print(recommendations) # Print the recommended skills
# Save the recommended skills to a text file
with open(r'C:\Users\laptop2\ok\static\out21.txt', 'w', encoding='utf-8') as file_out:
file_out.write("%s\n" % recommendations[0])
with open(r'C:\Users\laptop2\ok\static\out22.txt', 'w', encoding='utf-8') as file_out:
file_out.write("%s\n" % recommendations[1])
with open(r'C:\Users\laptop2\ok\static\out23.txt', 'w', encoding='utf-8') as file_out:
file_out.write("%s\n" % recommendations[2])
with open(r'C:\Users\laptop2\ok\static\out24.txt', 'w', encoding='utf-8') as file_out:
file_out.write("%s\n" % recommendations[3])
with open(r'C:\Users\laptop2\ok\static\out25.txt', 'w', encoding='utf-8') as file_out:
file_out.write("%s\n" % recommendations[4])
except FileNotFoundError:
print(f"File '{file_path}' not found.")
# Save the similarity matrix to a pickle file
import pickle
pickle.dump(sim, open('similarity.pkl', 'wb'))