Skip to content

Commit

Permalink
several data transform changes; make it into a pytorch dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
guidopetri committed Oct 22, 2020
1 parent b7371f0 commit 1390dea
Showing 1 changed file with 162 additions and 32 deletions.
194 changes: 162 additions & 32 deletions csgo_wp/data_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import torch
from csgo.analytics.distance import point_distance, area_distance
from functools import partial
import os
from pathlib import Path
import pandas as pd


def euclidean_distance(x, game_map):
Expand Down Expand Up @@ -32,49 +35,176 @@ def transform_data(df, game_map):
df.drop_duplicates(inplace=True)

for c in ['X', 'Y', 'Z']:
df[f'pos{c}'] = df[c].astype(float)

df['areaId'] = df['AreaId'].astype(int)

df['hp'] = df['Hp'].astype(int)
df.rename(columns={'Side': 'side',
'PlayerId': 'name',
'Tick': 'tick',
},
inplace=True)

df = df[['side',
'hp',
'name',
'posX',
'posY',
'posZ',
'tick',
'areaId',
]].copy() # shut up the warnings
df['pos'] = df[['posX', 'posY', 'posZ']].values.tolist()

merged = df.merge(df, on='tick')
df[c] = df[c].astype(float)

df['AreaId'] = df['AreaId'].astype(int)
df['Hp'] = df['Hp'].astype(int)

# this better not be a fractional
players_count = df['SteamId'].nunique()

df = df[['Side',
'Hp',
'SteamId',
'X',
'Y',
'Z',
'Tick',
'AreaId',
]]
# TODO: fix SettingWithCopy warning
df['pos'] = df[['X', 'Y', 'Z']].values.tolist()

merged = df.merge(df, on='Tick')
merged['pos_diff'] = merged[['pos_x', 'pos_y']].values.tolist()
merged['area_diff'] = merged[['areaId_x', 'areaId_y']].values.tolist()
merged['area_diff'] = merged[['AreaId_x', 'AreaId_y']].values.tolist()

area_dist = partial(area_dist_all, game_map=game_map)

distance_matrix = merged.pivot_table(index=['tick', 'name_x'],
columns='name_y',
distance_matrix = merged.pivot_table(index=['Tick', 'SteamId_x'],
columns='SteamId_y',
values='area_diff',
aggfunc=area_dist,
)

t = torch.Tensor(distance_matrix.values).view(-1, 10, 10)
t = torch.Tensor(distance_matrix.values).view(-1,
players_count,
players_count)

additional_data = (df.groupby(['tick', 'name'])
.agg({'hp': is_alive, # how about returning hp?
'side': is_ct,
additional_data = (df.groupby(['Tick', 'SteamId'])
.agg({'Hp': is_alive, # how about returning hp?
'Side': is_ct,
})
.astype(int)
)

t_2 = torch.Tensor(additional_data.values).view(-1, 10, 2)
t_2 = torch.Tensor(additional_data.values).view(-1, players_count, 2)

return torch.cat((t, t_2), dim=2).unsqueeze(1)
# return torch.cat((t, t_2), dim=2).unsqueeze(1)
n_samples = t.shape[0]
result = torch.cat([t.reshape(n_samples, players_count ** 2),
t_2.reshape(n_samples, players_count * 2)],
dim=1).view(n_samples, players_count + 2, players_count)

return result


class CSGODataset(torch.utils.data.Dataset):

def __init__(self,
folder='G:/datasets/csgo/',
transform=None,
dataset_split='train'):
self.folder = folder + 'match-map-unique/'
self.split = dataset_split
self.split_folder = self.folder + dataset_split

if transform is None:
raise ValueError('Transform required')

if not os.path.exists(self.folder):
print('Train/val/test splits not found')
self.file_loc = folder + 'example_frames.csv'
# parse csv and separate into folder

os.makedirs(self.folder + 'train')
os.makedirs(self.folder + 'val')
os.makedirs(self.folder + 'test')

print('Loading entire dataframe into memory...')
df = pd.read_csv(self.file_loc)

print('Getting match/map/round combinations...')
# list of lists
match_map_combos = (df[['MatchId',
'MapName',
'RoundNum',
]].drop_duplicates()
.values.tolist())

print('Writing combinations to train/val/test folders...')
for combo in match_map_combos:
value = torch.rand(1).item()

if value > 0.8:
split = 'test'
elif value < 0.6:
split = 'train'
else:
split = 'val'

subset = df[(df['MatchId'] == combo[0])
& (df['MapName'] == combo[1])
& (df['RoundNum'] == combo[2])].copy()
tick_count = subset['Tick'].nunique()
subset.to_csv(f'{self.folder}{split}/match-{combo[0]}-'
f'{combo[1]}-{combo[2]}-{tick_count}.csv')
print('Data written to disk')

self.transform = transform

self.folder = Path(self.split_folder)
self.matchups = list(self.folder.glob('*.csv'))

self.matchup_idx_by_sample_idx = {}
self.n_samples = 0

print('Calculating sample index offsets...')

for idx, matchup in enumerate(self.matchups):
tick_count = int(matchup.stem.split('-')[-1]) # last component

for i in range(self.n_samples, self.n_samples + tick_count):
self.matchup_idx_by_sample_idx[i] = (idx, self.n_samples)

self.n_samples += tick_count
print('Done!')

self.targets = pd.read_csv(folder + 'example_rounds.csv',
usecols=['MatchId',
'MapName',
'RoundNum',
'RoundWinnerSide',
])

def __len__(self):
return self.n_samples

def __getitem__(self, sample_idx):
idx, n_samples_prior = self.matchup_idx_by_sample_idx[sample_idx]

idx_in_match = sample_idx - n_samples_prior

df = pd.read_csv(self.matchups[idx])
match_id = df['MatchId'].values[0]
map_name = df['MapName'].values[0]
round_num = df['RoundNum'].values[0]

transformed_df = self.transform(df, map_name)
transformed_df = transformed_df[idx_in_match]

data = torch.Tensor(transformed_df)

round_result = self.targets[(self.targets['MatchId'] == match_id)
& (self.targets['MapName'] == map_name)
& (self.targets['RoundNum'] == round_num)]

target = (round_result['RoundWinnerSide'] == 'CT').astype(int)
target = torch.Tensor(target.values)

return data, target.view(1)


if __name__ == '__main__':
dataset = CSGODataset(transform=transform_data)

loader = torch.utils.data.DataLoader(dataset,
batch_size=1,
shuffle=True,
num_workers=0,
)

# check if dataset acts as expected
for index, (data, target) in enumerate(loader):
print(index, data.shape, target.shape)
break

0 comments on commit 1390dea

Please sign in to comment.