Skip to content
Snippets Groups Projects
Select Git revision
1 result Searching

README.md

Blame
  • Forked from processes / LabProcesses.jl
    Source project has a limited visibility.
    Q_learning.cpp 7.31 KiB
    #include <stdio.h>
    #include <iostream>
    #include <iomanip>
    #include <ctime>
    #include <cstdlib>
    
    using namespace std;
    
    
    const int row = 13;
    const int col = 13;
    
    
    double gamma = .8;
    double alpha = .1;
    double R_step = 200;
    
    double R[row][col] = {0};
    double Q[row][col] = {0};
     
    bool Goal = false;
    
    int iterations = 1;
    int it_;
    int user_action;
    int update_final_state;
    double Q_next_state;
    
    int i,j;
    double Q_curr_state = Q[i][j];
    
    double reward;
    
    int R_indx_i = row - row;
    int R_indx_j = .5 * col;
    
    int P_indx_i = row - 2;
    int P_indx_j = col - 1;
    
    int counter = 1;
    int Time_Reward;
    double sample;
    void print_R();
    void print_Q();
    void generate_rand();
    
    int main()
    {
            R[R_indx_i][R_indx_j] = 50; // reward
            R[P_indx_i][P_indx_j] = -100; // punishment
    
            print_R();
            
            cout << "\n iterations ? \n" ;
            cin >> it_;
    
            /* initialize random seed: */
      	srand (time(NULL));
    
            while ( iterations <= it_ ) 
            {       
                    if (user_action == 1 && i != 0) // North
                    {
                            reward = R[i][j];
                            Q_next_state = Q[i - 1][j];
                            
                            sample = reward + gamma * Q_next_state;
                            Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                            Q_curr_state = Q_next_state;
                            //printf(" Q_current_state = %f \n",Q_curr_state);