Q_learning.cpp

#include <stdio.h>
#include <iostream>
#include <iomanip>
#include <ctime>
#include <cstdlib>

using namespace std;

 const int row = 9;
 const int col = 9;


double gamma = .8;
double alpha = .1;
double R_step = 120;

double R[row][col] = {0};
double Q[row][col] = {0};

int iterations;
int it_;
int user_action;
double Q_next_state;

int i,j;
double Q_curr_state = Q[i][j];

double reward;

int R_indx_i = 0/*row - row*/;
int R_indx_j = 4/*.5 * col*/;

int P_indx_i = 8/*row - 2*/;
int P_indx_j = 0/*col - 1*/;

int counter;
int Time_Reward;
double sample;

void print_R();
void print_Q();
void iANDj_Generator();

int main()
{
        R[R_indx_i][R_indx_j] = 50; // reward
        R[P_indx_i][P_indx_j] = -60; // punishment

        print_R();
        
        cout << "\n iterations ? \n" ;
        cin >> it_;

        /* initialize random seed: */
  	srand (time(NULL));

        while ( iterations < it_ ) 
        {       
                if (user_action == 1 && i != 0) // North
                {
                        reward = R[i][j];
                        Q_next_state = Q[i - 1][j];
                        
                        sample = reward + gamma * Q_next_state;
                        Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                        Q_curr_state = Q_next_state;
                        //printf(" Q_current_state = %f \n",Q_curr_state);
                        i--;
                        counter++;
                } else if (user_action == 1 && i == 0) // North
                {
                        cout << "You can't go further up!\n";
                } else if (user_action == 3 && i < (row - 1)) // South, i < row
                {
                        reward = R[i][j];
                        Q_next_state = Q[i + 1][j];
                        
                        sample = reward + gamma * Q_next_state;
                        Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                        Q_curr_state = Q_next_state;
                        //printf(" Q_current_state = %f \n",Q_curr_state);
                        i++;
                        counter++;
                } else if (user_action == 3 && i >= (row - 1)) // South
                {
                        cout << "You can't go further down!\n";
                } else if (user_action == 2 && j < (col - 1)) // East
                {
                        reward = R[i][j];
                        Q_next_state = Q[i][j + 1];
                        
                        sample = reward + gamma * Q_next_state;
                        Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                        Q_curr_state = Q_next_state;
                        //printf(" Q_current_state = %f \n",Q_curr_state);
                        j++;
                        counter++;
                } else if (user_action == 2 && j >= (col - 1)) // East, j > col
                {
                        cout << "You can't go further right!\n";
                } else if (user_action == 4 && j != 0 ) // West
                {
                        reward = R[i][j];
                        Q_next_state = Q[i][j - 1];
                        
                        sample = reward + gamma * Q_next_state;
                        Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                        Q_curr_state = Q_next_state;
                        //printf(" Q_current_state = %f \n",Q_curr_state);
                        j--;
                        counter++;
                } else if (user_action == 4 && j == 0) // West, j = 1
                {
                        cout << "You can't go further left!\n";
                } else if (user_action == 0) // start
                {
                        cout << "\nGenerating random pose in grid for 1st. time!\n";
                        iANDj_Generator();
                }
                
                // + Reward
                if (i == R_indx_i && j == R_indx_j)
                {
                        Time_Reward = -counter;
                        cout << " Time Reward = "<< Time_Reward << "\n";
                        
                        if(abs(Time_Reward) <= R_step)
                        {
                                
                                cout << "\n Goal is achieved <= " << R_step << " time steps\n";
                                reward = R[i][j];
                                Q_next_state = 0;
                        
                                sample = reward + gamma * Q_next_state;
                                Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                        } else
                        {
                                cout << "\n Goal is achieved > " << R_step << " time steps => time_punishment\n";
                                reward = -1; // ???
                                Q_next_state = 0;
                        
                                sample = reward + gamma * Q_next_state;
                                Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                        }
                        
                        counter = 0;
                        print_Q();
                        iANDj_Generator();
                        iterations++;
                } else if (i == P_indx_i && j == P_indx_j) // - Reward => Punishment
                {
                           cout << "\n Failed to achieve a goal! \n";
                                
                                reward = R[i][j];
                                Q_next_state = 0;
                        
                                sample = reward + gamma * Q_next_state;
                                Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                                
                                print_Q();
                                iANDj_Generator();
                                iterations++;  
                }
                
                
                cout << "\n Q_value = " << Q_curr_state << " , actions N(1), E(2), S(3), W(4) : ";
                
                /*if(i <= R_indx_i && j > R_indx_j) // current pose : North-East (1)
                {
                        user_action = ((double) rand() / (RAND_MAX)) * (5 - 3) + 3;
                } else if (i <= R_indx_i && j <= R_indx_j) // current pose : North-West (2)
                {
                        user_action = ((double) rand() / (RAND_MAX)) * (4 - 2) + 2;
                } else if (i > R_indx_i && j <= R_indx_j) // current pose : South-West (3)
                {
                        user_action = ((double) rand() / (RAND_MAX)) * (3 - 1) + 1;
                } else if (i > R_indx_i && j > R_indx_j) // current pose : South-East (4)
                {
                        user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1;
                }*/
                
                if( j > R_indx_j) // current pose : Right (1)
                {
                        cout << "\n Right Side of the goal ... \n";
                        user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1;
                } else if (j <= R_indx_j) // current pose : Left (2)
                {
                        cout << "\n Left Side of the goal ... \n";
                        user_action = ((double) rand() / (RAND_MAX)) * (4 - 1) + 1;
                }
                
                
                //cin >> user_action;
                printf(" user action = %i \n",user_action);
                
          }
return 0;
}

void print_R()
{
         cout << " R = \n";
        for(int i = 0; i <= (row - 1); i++)
        {
                for(int j = 0; j <= (col - 1); j++)
                {
                        cout << setw(col - 1) << R[i][j];
			if(j < col - 1)
			{
				cout << " , ";
			}
		} // j
                cout << "\n";
        } // i
        cout << "\n";
}
void print_Q()
{
         cout << " Q = \n";
        for(int i = 0; i <= (row - 1); i++)
        {
                for(int j = 0; j <= (col - 1); j++)
                {
                        cout << setw(col - 1) << Q[i][j];
			if(j < col - 1)
			{
				cout << " , ";
			}
		} // j
                cout << "\n";
        } // i
        cout << "\n";
}

void iANDj_Generator()
{
        // Generate Random Pose for current state (position)
        
        i = ((double) rand() / (RAND_MAX)) * (row) ;
        j = ((double) rand() / (RAND_MAX)) * (col) ;
        
        Q_curr_state = Q[i][j];
        
        cout << "\n i = " << i << " , j = " << j << " => Q[i][j] = " << Q_curr_state << " \n";
        
}