-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrl.py
98 lines (79 loc) · 2.88 KB
/
rl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import numpy as np
import random
import math
from keras.models import Sequential, load_model
from keras.layers import Activation
from keras.layers import Dense
import keras.backend as K
import load as l
from geopy import Point
from geopy.distance import VincentyDistance
import time
SPEED_MODEL = 'speed.h5'
DISTANCE_MODEL = 'direction.h5'
speed_model = load_model(SPEED_MODEL)
distance_model = load_model(DISTANCE_MODEL)
def speed(vector):
x = speed_model.predict(np.reshape(vector[1],(1,1,1)))
# reward = K.tanh(1/math.sqrt((newCoords[0]-x[0])**0.5 - (newCoords[1]-x[1])**0.5))*10
return x
def direction(vector):
x = distance_model.predict(np.reshape(vector[3],(1,1,1)))
# reward = K.tanh(1/math.sqrt((newCoords[0]-x[0])**0.5 - (newCoords[1]-x[1])**0.5))*10
return x
def NewCoords(vector,timeDelay = 1/3600):
speed = vector[0]
direction = vector[3]
distance_km = speed * timeDelay
distance_miles = distance_km * 0.621371
return VincentyDistance(miles=distance_miles).destination(Point(vector[2],vector[1]),direction)
model=Sequential()
model.add(Dense(24,input_dim=2,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(2))
model.compile(optimizer='nadam',loss='mse')
gamma = 1.0
epsilon = 1.0
m = []
x_actual, y_actual = 30, 50
for i in range(50):
state = [x_actual,y_actual]
state = np.array([state])
for t in range(50):
if np.random.rand() <= epsilon:
action = [random.randrange(0,55),random.randrange(0,360)]
else:
action = np.argmax(model.predict(state))
#next_state,reward,done,observation = env.step(action)
next_state, reward = NewCoords(state,action[0],action[1],1)
next_state = np.array([next_state])
tot = reward + gamma * np.max(model.predict(next_state))
p = model.predict(state)[0]
p[action] = tot
model.fit(state, p.reshape(-1, 2), epochs=1, verbose=0)
m.append((state,action,reward,next_state,done))
state = next_state
if done:
print("Episode : {}, x_pred: {}, y_pred: {}".format(i,state[0],state[1]))
break
if len(m)==50000:
del m[:5000]
if epsilon > 0.01:
epsilon *= 0.999
if len(m) > 64:
for state, action, reward, next_state, done in random.sample(m,64):
tot=reward
if not done:
tot=reward + gamma * np.max(model.predict(next_state))
p = model.predict(state)[0]
p[action] = tot
model.fit(state,p.reshape(-1,2), epochs=1, verbose=0)
for i in range(20):
state = env.reset()
i = 0
while i != 10:
env.render()
action = np.argmax(model.predict(np.array([state])))
#next_state, reward, done, observation = env.step(action)
next_state, reward = NewCoords(state,action[0],action[1],1)
state = next_state