-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpythonServerCopy.py
148 lines (117 loc) · 6.33 KB
/
pythonServerCopy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
from http.server import BaseHTTPRequestHandler, HTTPServer
import json
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
hostName = "localhost"
serverPort = 8080
class MyServer(BaseHTTPRequestHandler):
def do_OPTIONS(self):
self.send_response(200)
self.send_header('Access-Control-Allow-Origin', '*')
self.send_header('Access-Control-Allow-Methods', 'POST, GET, OPTIONS')
self.send_header('Access-Control-Allow-Headers', 'Content-Type')
self.end_headers()
def do_GET(self):
self.send_response(405)
self.send_header("Content-type", "text/html")
self.send_header('Access-Control-Allow-Origin', '*')
self.end_headers()
self.wfile.write(bytes("<html><head><title>Method Not Allowed</title></head>", "utf-8"))
self.wfile.write(bytes("<body><p>GET method is not supported. Please use POST.</p></body></html>", "utf-8"))
def do_GET(self):
self.send_response(405)
self.send_header("Content-type", "text/html")
self.end_headers()
def do_POST(self):
if self.path == "/run-algorithm":
# Get the length of the data
content_length = int(self.headers['Content-Length'])
# Read the data
post_data = self.rfile.read(content_length)
# Parse the JSON data
user_input = json.loads(post_data)
# Validate the length of the input data
expected_length = 37 # Replace with the actual number of features used during training
if len(user_input) != expected_length:
raise ValueError(f"Expected input length {expected_length}, but got {len(user_input)}")
# Run the algorithm and get the result
probability_pcos = self.run_algorithm(user_input)
# Send the response
self.send_response(200)
self.send_header("Content-type", "application/json")
self.end_headers()
response = {
"probability_pcos": probability_pcos
}
self.wfile.write(bytes(json.dumps(response), "utf-8"))
else:
self.send_response(404)
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(bytes("<html><head><title>404 Not Found</title></head>", "utf-8"))
self.wfile.write(bytes("<body><p>Page not found.</p></body></html>", "utf-8"))
def run_algorithm(self, user_input):
# reading the dataset
file_path = r"C:\Users\jessi\OneDrive - Princeton University\Princeton Hacks\Diagnostic-Celiac-Disease-Management\PCOS Dataset.csv"
try:
data = pd.read_csv(file_path)
print("Dataset loaded successfully.")
except FileNotFoundError:
print(f"File not found: {file_path}")
return None
# Cleaning data set (converting strings to numbers)
data["AMH(ng/mL)"] = pd.to_numeric(data["AMH(ng/mL)"], errors='coerce')
data["II beta-HCG(mIU/mL)"] = pd.to_numeric(data["II beta-HCG(mIU/mL)"], errors='coerce')
data['Marraige Status (Yrs)'] = data['Marraige Status (Yrs)'].fillna(data['Marraige Status (Yrs)'].median())
data['II beta-HCG(mIU/mL)'] = data['II beta-HCG(mIU/mL)'].fillna(data['II beta-HCG(mIU/mL)'].median())
data['AMH(ng/mL)'] = data['AMH(ng/mL)'].fillna(data['AMH(ng/mL)'].median())
data['Fast food (Y/N)'] = data['Fast food (Y/N)'].fillna(data['Fast food (Y/N)'].mode()[0])
# Clearing up the extra space in the column names (optional)
data.columns = [col.strip() for col in data.columns]
# Identifying non-numeric columns
non_numeric_columns = data.select_dtypes(include=['object']).columns
print("Non-numeric columns:", non_numeric_columns)
# Converting non-numeric columns to numeric where possible
for col in non_numeric_columns:
data[col] = pd.to_numeric(data[col], errors='coerce')
# Dropping rows with any remaining non-numeric values
data.dropna(inplace=True)
print("Data cleaning completed.")
# Preparing data for model training
X = data.drop(["PCOS (Y/N)", "Sl. No", "Patient File No.", "Marraige Status (Yrs)", "Blood Group","II beta-HCG(mIU/mL)","TSH (mIU/L)","Waist:Hip Ratio"], axis=1)
y = data["PCOS (Y/N)"]
# Splitting the data into test and training sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
print("Data split into training and test sets.")
# Fitting the RandomForestClassifier to the training set
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
print("Model training completed.")
# Convert the user input to a NumPy array and reshape it to be 2D
user_input_reshaped = np.array(user_input).reshape(1, -1)
# Ensure the input data has the correct shape
if user_input_reshaped.shape[1] != X.shape[1]:
raise ValueError(f"Expected input shape ({1}, {X.shape[1]}), but got {user_input_reshaped.shape}")
# Convert user input to DataFrame with feature names
user_input_df = pd.DataFrame(user_input_reshaped, columns=X.columns)
# Assuming the scaler used during training
scaler = StandardScaler() # Normally you'd load a fitted scaler
# For demonstration, let's assume no scaling (remove if you're scaling the input)
user_input_scaled = user_input_df # Remove this if you're applying scaling
# Get the probability of PCOS
probabilities = rfc.predict_proba(user_input_scaled)
# Extract probability for PCOS (class 1)
probability_pcos = probabilities[0][1] # Probability of PCOS (class 1)
return probability_pcos
if __name__ == "__main__":
webServer = HTTPServer((hostName, serverPort), MyServer)
print("Server started http://%s:%s" % (hostName, serverPort))
try:
webServer.serve_forever()
except KeyboardInterrupt:
pass
webServer.server_close()
print("Server stopped.")