-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsearch_for_profiles.py
96 lines (81 loc) · 3.52 KB
/
search_for_profiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""
The primary module for the LinkedIn Search Tool section of ConneXion.
This module faciliates the overall execution of the search methods, which
includes loading the preferences from consts.py, searching Google for
LinkedIn profiles based on those preferences, and saving the search results
to an Excel file, while indexing the data to a .pkl file.
"""
import pandas as pd
from search_tool.google_api import GoogleSearchAPI
import consts as c
def load_preferences():
"""
Loads the search preferences from the consts.py file.
Returns: A dictionary object with four keys named "location",
"position", "exp_op", and "exp_num" which each correspond to
the user's preference for that search filter category.
"""
dict = {"location" : c.LOCATIONS, "position" : c.POSITIONS,
"exp_op" : c.EXPERIENCE_OPERATOR, "exp_num" : c.EXPERIENCE_YEARS}
return dict
def run_search(preferences):
"""
Uses Google's search API to search LinkedIn accounts based on preferences.
Uses preferences to generate a specific search term based on position,
location, and experience, then uses the Google API to search for the term.
Returns: A list of length 2 where the fist element is a pandas DataFrame
containing search results, and the second element is an error message, if one
occured.
Parameter preferences: A dictionary of user search preferences.
Precondition: preferences is generated by search_for_profiles.load_preferences()
and is based on a consts.py configuration which follows the rules
outlined in that file.
"""
# Generate search terms
google = GoogleSearchAPI(c.API_KEY, c.SEARCH_ENGINE_ID)
queries = google.generate_queries(preferences)
# Search for list of terms using Google API
results = google.search(queries)
# Remove error results
# Remove error results
i = 0
start = "https://www.linkedin.com/in"
while i < len(results[0]):
link = results[0]["Link"][i]
if link[:27] != start:
results[0] = results[0].drop(i).reset_index(drop=True)
else:
i += 1
results[0] = results[0].reset_index(drop=True)
return results
def save_results(results):
"""
Saves search results to an Excel file at the location designated in consts.py.
Parameter results: A list of search results.
Precondition: results is a pandas DataFrame.
"""
try:
df = pd.read_excel(c.EXCEL_FILE_LOCATION)
new = pd.concat([df, results])
new.to_excel(c.EXCEL_FILE_LOCATION, index=False)
except:
results.to_excel(c.EXCEL_FILE_LOCATION, index=False)
def execute_search():
"""
Orchestrates the overall execution of the LinkedIn Search Tool.
Modifies: An Excel file to contain information about potential client
profiles the program finds via Google, and two pkl files that contain
information about previously indexed profiles and search queries.
Returns: A String message indicating the success of the search.
"""
# Load search preferences
preferences = load_preferences()
# Search and record results, index query
search_results = run_search(preferences)
# Save results to excel file
save_results(search_results[0])
# Return message at end of program
if search_results[1] != "":
return search_results[1]
num_added = str(len(search_results[0]))
return "Search Completed: " + num_added + " new profiles added."