Q_learning.cpp

  
 #include <stdio.h>
#include <iostream>
#include <iomanip>
#include <ctime>
#include <cstdlib>

using namespace std;


const int row = 13;
const int col = 13;


double gamma = .8;
double alpha = .1;
double R_step = 200;

double R[row][col] = {0};
double Q[row][col] = {0};
 
bool Goal = false;

int iterations = 1;
int it_;
int user_action;
int update_final_state;
double Q_next_state;

int i,j;
double Q_curr_state = Q[i][j];

double reward;

int R_indx_i = row - row;
int R_indx_j = .5 * col;

int P_indx_i = row - 2;
int P_indx_j = col - 1;

int counter = 1;
int Time_Reward;
double sample;
void print_R();
void print_Q();
void generate_rand();

int main()
{
        R[R_indx_i][R_indx_j] = 50; // reward
        R[P_indx_i][P_indx_j] = -100; // punishment

        print_R();
        
        cout << "\n iterations ? \n" ;
        cin >> it_;

        /* initialize random seed: */
  	srand (time(NULL));

        while ( iterations <= it_ ) 
        {       
                if (user_action == 1 && i != 0) // North
                {
                        reward = R[i][j];
                        Q_next_state = Q[i - 1][j];
                        
                        sample = reward + gamma * Q_next_state;
                        Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                        Q_curr_state = Q_next_state;
                        //printf(" Q_current_state = %f \n",Q_curr_state);