Skip to content

Commit

Permalink
refactored PlayerAnomalyDetectionModel and PlayerAccountHandler class…
Browse files Browse the repository at this point in the history
…, updated unit tests
  • Loading branch information
merillium committed Feb 4, 2024
1 parent c6b6e0c commit 77058f8
Show file tree
Hide file tree
Showing 15 changed files with 465 additions and 342 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.*
**/__pycache__/
!/.gitignore
__pycache__
lichess_player_data/
lichess-games-database/
exploratory_plots/
Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.PHONY: test
test:
PYTHONPATH=. pytest
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,21 @@ The model is built on the assumption that cheating is a rare occurrence in any d

### Sample code:
```python
import pandas as pd
from player_account_handler import PlayerAccountHandler
from model import PlayerAnomalyDetectionModel
BASE_FILE_NAME = 'lichess_db_standard_rated_2015-01'
train_data = pd.read_csv(f'lichess_player_data/{BASE_FILE_NAME}_player_features.csv')
model = PlayerAnomalyDetectionModel()
player_account_handler = PlayerAccountHandler()
model = PlayerAnomalyDetectionModel(player_account_handler)
model.fit(train_data)
model.save_model(f'{BASE_FILE_NAME}_model')
predictions = model.predict(train_data)
```

### Unit Tests
Currently working on the following unit test(s) which can be run with the following command:
```pytest test_model.py```
Currently working on unit tests, which can be run with the following command:
```make test```, or if you want to run test files individually ```PYTHONPATH=. pytest tests/test_model.py```

To-do:
- write a bash script to download and unzip data from the lichess.org open database
Expand Down
21 changes: 11 additions & 10 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
A sample Hello World server.
"""

import os

from flask import Flask, render_template
Expand All @@ -11,20 +12,20 @@
app = Flask(__name__)


@app.route('/')
@app.route("/")
def hello():
"""Return a friendly HTTP greeting."""
message = "It's running!"

"""Get Cloud Run environment variables."""
service = os.environ.get('K_SERVICE', 'Unknown service')
revision = os.environ.get('K_REVISION', 'Unknown revision')
service = os.environ.get("K_SERVICE", "Unknown service")
revision = os.environ.get("K_REVISION", "Unknown revision")

return render_template(
"index.html", message=message, Service=service, Revision=revision
)

return render_template('index.html',
message=message,
Service=service,
Revision=revision)

if __name__ == '__main__':
server_port = os.environ.get('PORT', '8080')
app.run(debug=False, port=server_port, host='0.0.0.0')
if __name__ == "__main__":
server_port = os.environ.get("PORT", "8080")
app.run(debug=False, port=server_port, host="0.0.0.0")
17 changes: 10 additions & 7 deletions enums.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
from enum import Enum


class TimeControl(Enum):
BULLET = 'bullet'
BLITZ = 'blitz'
RAPID = 'rapid'
CLASSICAL = 'classical'
ALL = ['bullet', 'blitz', 'rapid', 'classical']
BULLET = "bullet"
BLITZ = "blitz"
RAPID = "rapid"
CLASSICAL = "classical"
ALL = ["bullet", "blitz", "rapid", "classical"]


class Folders(Enum):
MODEL_PLOTS = 'model_plots'
SAVED_MODELS = 'saved_models'
MODEL_PLOTS = "model_plots"
SAVED_MODELS = "saved_models"
EXPLORATORY_PLOTS = "exploratory_plots"
154 changes: 89 additions & 65 deletions exploratory_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,91 +4,115 @@
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from enums import Folders

## constants could eventually go into enums
BASE_FILE_NAME = 'lichess_db_standard_rated_2015-01'
EXPLORATORY_PLOTS_FOLDER = 'exploratory_plots'
BASE_FILE_NAME = "lichess_db_standard_rated_2015-01"

if not os.path.exists(EXPLORATORY_PLOTS_FOLDER):
os.mkdir(EXPLORATORY_PLOTS_FOLDER)

if not os.path.exists(Folders.EXPLORATORY_PLOTS_FOLDER.value):
os.mkdir(Folders.EXPLORATORY_PLOTS_FOLDER.value)

## load the player features dataframe
all_player_features = pd.read_csv(f'lichess_player_data/{BASE_FILE_NAME}_player_features.csv')
all_player_features = all_player_features[all_player_features['time_control'].isin(['bullet','blitz','classical'])]
all_player_features = pd.read_csv(
f"lichess_player_data/{BASE_FILE_NAME}_player_features.csv"
)
all_player_features = all_player_features[
all_player_features["time_control"].isin(["bullet", "blitz", "classical"])
]

## plot the distribution of mean rating gain for each rating bin
for time_group, time_group_df in all_player_features.groupby('time_control'):
for time_group, time_group_df in all_player_features.groupby("time_control"):
fig = go.Figure()
for rating_bin, rating_group in time_group_df.groupby('rating_bin'):
for rating_bin, rating_group in time_group_df.groupby("rating_bin"):
rating_bin_str = f"{rating_bin}-{rating_bin+100}"
fig.add_trace(go.Violin(x=rating_group['mean_rating_gain'].values,
name=rating_bin_str,
box_visible=False,
meanline_visible=False,
opacity=0.5),
)

## the side='positive' argument to update_traces method
## is only valid for a figure containing only go.Violin plots,
fig.add_trace(
go.Violin(
x=rating_group["mean_rating_gain"].values,
name=rating_bin_str,
box_visible=False,
meanline_visible=False,
opacity=0.5,
),
)

## the side='positive' argument to update_traces method
## is only valid for a figure containing only go.Violin plots,
## so we have to update_layout before we add any of the go.Scatter traces

fig.update_traces(orientation='h', side='positive', width=3, points=False)
fig.update_layout(title=f'{time_group.capitalize()} Rating Changes by Rating Bin',
xaxis_title='Mean Rating Change',
yaxis_title='Rating Bin',
xaxis_showgrid=False, xaxis_zeroline=False)

fig.update_traces(orientation="h", side="positive", width=3, points=False)
fig.update_layout(
title=f"{time_group.capitalize()} Rating Changes by Rating Bin",
xaxis_title="Mean Rating Change",
yaxis_title="Rating Bin",
xaxis_showgrid=False,
xaxis_zeroline=False,
)

## add markers to indicate the mean rating gain for each rating bin
for rating_bin, rating_group in time_group_df.groupby('rating_bin'):
for rating_bin, rating_group in time_group_df.groupby("rating_bin"):
rating_bin_str = f"{rating_bin}-{rating_bin+100}"
fig.add_trace(go.Scatter(
x=[rating_group['mean_rating_gain'].mean()],
y=[rating_bin_str],
mode='markers',
showlegend=False,
marker=dict(color='black', size=5),
marker_symbol='diamond'
))

fig.add_vline(x=0, line_dash="dash", line_color='blue', line_width=2, opacity=0.5)
fig.write_html(f"exploratory_plots/{BASE_FILE_NAME}_{time_group}_rating_gain.html")
fig.add_trace(
go.Scatter(
x=[rating_group["mean_rating_gain"].mean()],
y=[rating_bin_str],
mode="markers",
showlegend=False,
marker=dict(color="black", size=5),
marker_symbol="diamond",
)
)

fig.add_vline(x=0, line_dash="dash", line_color="blue", line_width=2, opacity=0.5)
fig.write_html(
f"{Folders.EXPLORATORY_PLOTS_FOLDER.value}/{BASE_FILE_NAME}_{time_group}_rating_gain.html"
)


## plot distribution of mean_perf_diff
for time_group, time_group_df in all_player_features.groupby('time_control'):
for time_group, time_group_df in all_player_features.groupby("time_control"):
fig = go.Figure()
for rating_bin, rating_group in time_group_df.groupby('rating_bin'):
for rating_bin, rating_group in time_group_df.groupby("rating_bin"):
rating_bin_str = f"{rating_bin}-{rating_bin+100}"
fig.add_trace(go.Violin(x=rating_group['mean_perf_diff'].values,
name=rating_bin_str,
box_visible=False,
meanline_visible=False,
opacity=0.5),
)

## the side='positive' argument to update_traces method
## is only valid for a figure containing only go.Violin plots,
fig.add_trace(
go.Violin(
x=rating_group["mean_perf_diff"].values,
name=rating_bin_str,
box_visible=False,
meanline_visible=False,
opacity=0.5,
),
)

## the side='positive' argument to update_traces method
## is only valid for a figure containing only go.Violin plots,
## so we have to update_layout before we add any of the go.Scatter traces

## add markers to indicate the mean rating gain for each rating bin
fig.update_traces(orientation='h', side='positive', width=3, points=False)
fig.update_layout(title=f'{time_group.capitalize()} Performance Difference by Rating Bin',
xaxis_title='Mean Performance Difference',
yaxis_title='Rating Bin',
xaxis_range=[-1.00,1.00],
xaxis_showgrid=False, xaxis_zeroline=False)
fig.update_traces(orientation="h", side="positive", width=3, points=False)
fig.update_layout(
title=f"{time_group.capitalize()} Performance Difference by Rating Bin",
xaxis_title="Mean Performance Difference",
yaxis_title="Rating Bin",
xaxis_range=[-1.00, 1.00],
xaxis_showgrid=False,
xaxis_zeroline=False,
)

for rating_bin, rating_group in time_group_df.groupby('rating_bin'):
for rating_bin, rating_group in time_group_df.groupby("rating_bin"):
rating_bin_str = f"{rating_bin}-{rating_bin+100}"
fig.add_trace(go.Scatter(
x=[rating_group['mean_perf_diff'].mean()],
y=[rating_bin_str],
mode='markers',
showlegend=False,
marker=dict(color='black', size=5),
marker_symbol='diamond'
))

fig.add_vline(x=0.0, line_dash="dash", line_color='blue', line_width=2, opacity=0.5)
fig.write_html(f"exploratory_plots/{BASE_FILE_NAME}_{time_group}_perf_diff.html")
fig.add_trace(
go.Scatter(
x=[rating_group["mean_perf_diff"].mean()],
y=[rating_bin_str],
mode="markers",
showlegend=False,
marker=dict(color="black", size=5),
marker_symbol="diamond",
)
)

fig.add_vline(x=0.0, line_dash="dash", line_color="blue", line_width=2, opacity=0.5)
fig.write_html(
f"{Folders.EXPLORATORY_PLOTS_FOLDER.value}/{BASE_FILE_NAME}_{time_group}_perf_diff.html"
)
42 changes: 0 additions & 42 deletions get_player_labels.py

This file was deleted.

Loading

0 comments on commit 77058f8

Please sign in to comment.