-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
81 lines (63 loc) · 2.41 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
from io import StringIO
from time import time
import cohere
import numpy as np
import pandas as pd
import streamlit as st
import json
from Recommender import Recommender
COHERE_API_KEY = os.environ.get("COHERE_API_KEY")
COHERE_MODEL_NAME = os.environ["CO_MODEL_NAME"]
co = cohere.Client(COHERE_API_KEY)
recommender = Recommender(co, COHERE_MODEL_NAME)
DATASET_PATH = "./output/job_descriptions.json"
RESULTS_LIMIT = 3
def get_embeddings():
with open(DATASET_PATH, 'r', encoding='utf-8') as file:
job_descriptions = json.load(file)
jd_df = pd.DataFrame(job_descriptions)
embeds = co.embed(texts=list(jd_df),
model=COHERE_MODEL_NAME).embeddings
return jd_df, embeds
st.set_page_config(layout="wide")
st.markdown("## Let's find the best job for you.")
@st.cache_data()
def setup():
jd_df, embeddings = get_embeddings()
candidates = np.array(embeddings, dtype=np.float32)
return jd_df, candidates
jd_df, candidates = setup()
job_postings = sorted(jd_df.jobId.tolist())
print(job_postings)
images_cache = {}
uploaded_file = st.file_uploader("Upload your resume", type="txt")
input_resume = None
if uploaded_file is not None:
stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
input_resume = stringio.read()
st.write(input_resume)
retrieve_button = st.button("fetch!")
if input_resume or retrieve_button:
print(f"Querying resume")
start_time = time()
result = recommender.get_similarity(input_resume, candidates=candidates, top_k=RESULTS_LIMIT)
print(result)
end_time = time()
similar_results = {}
for index, hit in enumerate(result):
print(hit)
similar_example = jd_df.iloc[index]
similar_results[index] = similar_example
print("Similar Results:")
print(similar_results)
for index in range(0, len(similar_results), RESULTS_LIMIT):
cols = st.columns(RESULTS_LIMIT)
for i in range(RESULTS_LIMIT):
try:
cols[i].markdown(f"**jobId**: {similar_results[index + i]['jobId']}")
cols[i].markdown(f"**Role**: {similar_results[index + i]['role']}")
cols[i].markdown(f"**Department**: {similar_results[index + i]['department']}")
except:
continue
st.markdown(f"search latency = {end_time - start_time:.4f}s")