Select Git revision
Q_learning.cpp
Farid Alijani authored
Q_learning.cpp 8.49 KiB
#include <stdio.h>
#include <iostream>
#include <iomanip>
#include <ctime>
#include <cstdlib>
using namespace std;
const int row = 9;
const int col = 9;
double gamma = .8;
double alpha = .1;
double R_step = 120;
double R[row][col] = {0};
double Q[row][col] = {0};
int iterations;
int it_;
int user_action;
double Q_next_state;
int i,j;
double Q_curr_state = Q[i][j];
double reward;
int R_indx_i = 0/*row - row*/;
int R_indx_j = 4/*.5 * col*/;
int P_indx_i = 8/*row - 2*/;
int P_indx_j = 0/*col - 1*/;
int counter;
int Time_Reward;
double sample;
void print_R();
void print_Q();
void iANDj_Generator();
int main()
{
R[R_indx_i][R_indx_j] = 50; // reward
R[P_indx_i][P_indx_j] = -60; // punishment
print_R();
cout << "\n iterations ? \n" ;
cin >> it_;
/* initialize random seed: */
srand (time(NULL));
while ( iterations < it_ )
{
if (user_action == 1 && i != 0) // North
{
reward = R[i][j];
Q_next_state = Q[i - 1][j];
sample = reward + gamma * Q_next_state;
Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
Q_curr_state = Q_next_state;
//printf(" Q_current_state = %f \n",Q_curr_state);
i--;
counter++;
} else if (user_action == 1 && i == 0) // North
{
cout << "You can't go further up!\n";
} else if (user_action == 3 && i < (row - 1)) // South, i < row
{
reward = R[i][j];
Q_next_state = Q[i + 1][j];
sample = reward + gamma * Q_next_state;
Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
Q_curr_state = Q_next_state;
//printf(" Q_current_state = %f \n",Q_curr_state);
i++;
counter++;
} else if (user_action == 3 && i >= (row - 1)) // South
{
cout << "You can't go further down!\n";
} else if (user_action == 2 && j < (col - 1)) // East
{
reward = R[i][j];
Q_next_state = Q[i][j + 1];
sample = reward + gamma * Q_next_state;
Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
Q_curr_state = Q_next_state;
//printf(" Q_current_state = %f \n",Q_curr_state);
j++;
counter++;
} else if (user_action == 2 && j >= (col - 1)) // East, j > col
{
cout << "You can't go further right!\n";
} else if (user_action == 4 && j != 0 ) // West
{
reward = R[i][j];
Q_next_state = Q[i][j - 1];
sample = reward + gamma * Q_next_state;
Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
Q_curr_state = Q_next_state;
//printf(" Q_current_state = %f \n",Q_curr_state);
j--;
counter++;
} else if (user_action == 4 && j == 0) // West, j = 1
{
cout << "You can't go further left!\n";
} else if (user_action == 0) // start
{
cout << "\nGenerating random pose in grid for 1st. time!\n";
iANDj_Generator();
}
// + Reward
if (i == R_indx_i && j == R_indx_j)
{
Time_Reward = -counter;
cout << " Time Reward = "<< Time_Reward << "\n";
if(abs(Time_Reward) <= R_step)
{
cout << "\n Goal is achieved <= " << R_step << " time steps\n";
reward = R[i][j];
Q_next_state = 0;
sample = reward + gamma * Q_next_state;
Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
} else
{
cout << "\n Goal is achieved > " << R_step << " time steps => time_punishment\n";
reward = -1; // ???
Q_next_state = 0;
sample = reward + gamma * Q_next_state;
Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
}
counter = 0;
print_Q();
iANDj_Generator();
iterations++;
} else if (i == P_indx_i && j == P_indx_j) // - Reward => Punishment
{
cout << "\n Failed to achieve a goal! \n";
reward = R[i][j];
Q_next_state = 0;
sample = reward + gamma * Q_next_state;
Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
print_Q();
iANDj_Generator();
iterations++;
}
cout << "\n Q_value = " << Q_curr_state << " , actions N(1), E(2), S(3), W(4) : ";
/*if(i <= R_indx_i && j > R_indx_j) // current pose : North-East (1)
{
user_action = ((double) rand() / (RAND_MAX)) * (5 - 3) + 3;
} else if (i <= R_indx_i && j <= R_indx_j) // current pose : North-West (2)
{
user_action = ((double) rand() / (RAND_MAX)) * (4 - 2) + 2;
} else if (i > R_indx_i && j <= R_indx_j) // current pose : South-West (3)
{
user_action = ((double) rand() / (RAND_MAX)) * (3 - 1) + 1;
} else if (i > R_indx_i && j > R_indx_j) // current pose : South-East (4)
{
user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1;
}*/
if( j > R_indx_j) // current pose : Right (1)
{
cout << "\n Right Side of the goal ... \n";
user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1;
} else if (j <= R_indx_j) // current pose : Left (2)
{
cout << "\n Left Side of the goal ... \n";
user_action = ((double) rand() / (RAND_MAX)) * (4 - 1) + 1;
}
//cin >> user_action;
printf(" user action = %i \n",user_action);
}
return 0;
}
void print_R()
{
cout << " R = \n";
for(int i = 0; i <= (row - 1); i++)
{
for(int j = 0; j <= (col - 1); j++)
{
cout << setw(col - 1) << R[i][j];
if(j < col - 1)
{
cout << " , ";
}
} // j
cout << "\n";
} // i
cout << "\n";
}
void print_Q()
{
cout << " Q = \n";
for(int i = 0; i <= (row - 1); i++)
{
for(int j = 0; j <= (col - 1); j++)
{
cout << setw(col - 1) << Q[i][j];
if(j < col - 1)
{
cout << " , ";
}
} // j
cout << "\n";
} // i
cout << "\n";
}
void iANDj_Generator()
{
// Generate Random Pose for current state (position)
i = ((double) rand() / (RAND_MAX)) * (row) ;
j = ((double) rand() / (RAND_MAX)) * (col) ;
Q_curr_state = Q[i][j];
cout << "\n i = " << i << " , j = " << j << " => Q[i][j] = " << Q_curr_state << " \n";
}