-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrLearn.m
145 lines (128 loc) · 4.52 KB
/
rLearn.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
classdef rLearn < handle
% RLEARN
%
%
%
% by Breno Rodrigues Brito
% by Dyego Soares de Araujo
% Last Edited 22/11/2013
properties
%% Parametros Macro
epsilon; % Percentual de Aleatoriedade
param; % parametro de multiplicação da acao
gamma;
lambda;
alpha;
nState; % discretização
%% Informacoes Transitorias
action; % Acao Tomada\Acao Antiga
state; % Estado Atual\Estado Antigo
rede; % Rede Neural Atual
%% TESTE
Q; % Valor de Cada Acao em Cada Estado
E; % Rastros de Eligibilidade
end
methods
%% Create Method
function obj = rLearn(nState, epsilon, alpha, gamma, lambda)
% Tabelas Q, E, action e state
obj.state = struct('cur', [], 'old', []);
obj.action = struct('cur', [], 'old', []);
% Parametros
obj.epsilon = epsilon;
obj.param = 1;
obj.lambda = lambda;
obj.gamma = gamma;
obj.alpha = alpha;
obj.nState = nState(2);
% Gera a rede neural
% obj.rede = neuralObject(nState + 1, N, alpha, gamma, lambda);
% obj.Q = 0.01*rand(obj.nState, obj.nState, 2, 2);
obj.Q = zeros(obj.nState, obj.nState, 2, 2);
resetE(obj);
end
%% Acoes
% Escolhe uma Acao
function action = makeChoice(obj, s)
% Toma a Acao (Aleatoria / Greedy)
% Gera Numero Aleatorio
aleat = 100*rand;
% Calcula Valores Q
qKeep = obj.Q(s(1),s(2),s(3),1);
qBuy = obj.Q(s(1),s(2),s(3),2);
% Se aleat > epsilon
if (aleat > obj.epsilon)
% Acao Greedy
action = (qBuy >= qKeep);
else
% Acao Aleatoria
action = (randi(2)==1);
end
if action
end
% %%% TESTE
% % Registra estados de compra
% obj.tableQ.states = [obj.tableQ.states s];
% obj.tableQ.actions= [obj.tableQ.actions 1];
% obj.tableQ.Qvalue = [obj.tableQ.Qvalue qBuy];
% % Registra estados de manter
% obj.tableQ.states = [obj.tableQ.states s];
% obj.tableQ.actions= [obj.tableQ.actions 0];
% obj.tableQ.Qvalue = [obj.tableQ.Qvalue qKeep];
end
%% Funcao de Registro
function register(obj, state, action)
% Registra o Estado
obj.state.old = obj.state.cur;
obj.state.cur = state;
% Registra acao
obj.action.old = obj.action.cur;
obj.action.cur = action;
end
%% Funcao de Inicialicazao
% Inicializacao do rLearn
function initLearn(obj, state, action)
% Recebe o primeiro estado
obj.state.cur = state;
% Recebe a primeira acao
obj.action.cur = action;
% Reseta E
resetE(obj);
% Reseta a rede
% reset(obj.rede);
%%%TESTE
% Registra os valores na tabela Q
% obj.tableQ = struct('Qvalue', {0}, 'states', {state}, 'actions', {action});
end
%% Atualizacao de Tabelas
% Atualizar tabela Q
function updateQ(obj, reward)
% % Constroi os Inputs
% inputPast = [obj.state.old; obj.param*obj.action.old];
% inputPres = [obj.state.cur; obj.param*obj.action.cur];
%
% % Requisita atualizacao da rede Neural
% adapt(obj.rede, inputPast, inputPres, reward);
s1 = obj.state.cur;
a1 = obj.action.cur +1;
s = obj.state.old;
a = obj.action.old +1;
qF = obj.Q(s1(1),s1(2),s1(3),a1);
qP = obj.Q(s(1),s(2),s(3),a);
delta = reward + obj.gamma*qF - qP;
obj.Q = obj.Q + obj.alpha * delta * obj.E;
end
function resetE(obj)
obj.E = zeros(obj.nState,obj.nState, 2, 2);
end
function updateE(obj)
% decai
obj.E = obj.gamma*obj.lambda*obj.E;
% Encurta
s = obj.state.old;
a = obj.action.old +1;
% Atualiza
obj.E(s(1),s(2),s(3),a) = obj.E(s(1),s(2),s(3),a) + 1;
end
end
end