Skip to content
Snippets Groups Projects
Select Git revision
  • fc7811c55c4906494a8eadd6fbfd75e5828bd2ff
  • master default
2 results

XdfParser.java

Blame
  • Q_learning.cpp 7.31 KiB
    #include <stdio.h>
    #include <iostream>
    #include <iomanip>
    #include <ctime>
    #include <cstdlib>
    
    using namespace std;
    
    
    const int row = 13;
    const int col = 13;
    
    
    double gamma = .8;
    double alpha = .1;
    double R_step = 200;
    
    double R[row][col] = {0};
    double Q[row][col] = {0};
     
    bool Goal = false;
    
    int iterations = 1;
    int it_;
    int user_action;
    int update_final_state;
    double Q_next_state;
    
    int i,j;
    double Q_curr_state = Q[i][j];
    
    double reward;
    
    int R_indx_i = row - row;
    int R_indx_j = .5 * col;
    
    int P_indx_i = row - 2;
    int P_indx_j = col - 1;
    
    int counter = 1;
    int Time_Reward;
    double sample;
    void print_R();
    void print_Q();
    void generate_rand();
    
    int main()
    {
            R[R_indx_i][R_indx_j] = 50; // reward
            R[P_indx_i][P_indx_j] = -100; // punishment
    
            print_R();
            
            cout << "\n iterations ? \n" ;
            cin >> it_;
    
            /* initialize random seed: */
      	srand (time(NULL));
    
            while ( iterations <= it_ ) 
            {       
                    if (user_action == 1 && i != 0) // North
                    {
                            reward = R[i][j];
                            Q_next_state = Q[i - 1][j];
                            
                            sample = reward + gamma * Q_next_state;
                            Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                            Q_curr_state = Q_next_state;
                            //printf(" Q_current_state = %f \n",Q_curr_state);
                            i--;
                            counter++;
                    } else if (user_action == 1 && i == 0) // North
                    {
                            cout << "You can't go further up!\n";
                    } else if (user_action == 3 && i < (row - 1)) // South, i < row
                    {
                            reward = R[i][j];
                            Q_next_state = Q[i + 1][j];
                            
                            sample = reward + gamma * Q_next_state;
                            Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                            Q_curr_state = Q_next_state;
                            //printf(" Q_current_state = %f \n",Q_curr_state);
                            i++;
                            counter++;
                    } else if (user_action == 3 && i >= (row - 1)) // South
                    {
                            cout << "You can't go further down!\n";
                    } else if (user_action == 2 && j < (col - 1)) // East
                    {
                            reward = R[i][j];
                            Q_next_state = Q[i][j + 1];
                            
                            sample = reward + gamma * Q_next_state;
                            Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                            Q_curr_state = Q_next_state;
                            //printf(" Q_current_state = %f \n",Q_curr_state);
                            j++;
                            counter++;
                    } else if (user_action == 2 && j >= (col - 1)) // East, j > col
                    {
                            cout << "You can't go further right!\n";
                    } else if (user_action == 4 && j != 0 ) // West
                    {
                            reward = R[i][j];
                            Q_next_state = Q[i][j - 1];
                            
                            sample = reward + gamma * Q_next_state;
                            Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                            Q_curr_state = Q_next_state;
                            //printf(" Q_current_state = %f \n",Q_curr_state);
                            j--;
                            counter++;
                    } else if (user_action == 4 && j == 0) // West, j = 1
                    {
                            cout << "You can't go further left!\n";
                    } else
                    {
                            cout << "\nGenerating random pose in grid for 1st. time!\n";
                            generate_rand();
                    }
                    
                    // + Reward
                    if (i == R_indx_i && j == R_indx_j)
                    {
                            Time_Reward = -counter;
                            cout << " Time Reward = "<< Time_Reward << "\n";
                            
                            if(abs(Time_Reward) <= R_step)
                            {
                                    
                                    cout << "\n Goal is achieved <= " << R_step << " time steps\n";
                                    reward = R[i][j];
                                    Q_next_state = 0;
                            
                                    sample = reward + gamma * Q_next_state;
                                    Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                            } else
                            {
                                    cout << "\n Goal is achieved > " << R_step << " time steps => time_punishment\n";
                                    reward = -1; // ???
                                    Q_next_state = 0;
                            
                                    sample = reward + gamma * Q_next_state;
                                    Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                            }
                            
                            counter = 1;
                            print_Q();
                            generate_rand();
                            iterations++;
                                    
                                    //Goal = true;     
                    } else if (i == P_indx_i && j == P_indx_j) // - Reward => Punishment
                    {
                               cout << "\n Failed to achieve a goal! \n";
                                    
                                    reward = R[i][j];
                                    Q_next_state = 0;
                            
                                    sample = reward + gamma * Q_next_state;
                                    Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                                    
                                    print_Q();
                                    generate_rand();
                                    iterations++;
                                    //Goal = true;     
                    }
                    
                    cout << "\n Q_value = " << Q_curr_state << " , actions N(1), E(2), S(3), W(4) : ";
                    user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1;
                    printf(" ramdom user action = %i \n",user_action);
                    //cin >> user_action;
                    
              }
    return 0;
    }
    
    void print_R()
    {
             cout << " R = \n";
            for(int i = 0; i <= (row - 1); i++)
            {
                    for(int j = 0; j <= (col - 1); j++)
                    {
                            cout << setw(col - 1) << R[i][j];
    			if(j < col - 1)
    			{
    				cout << " , ";
    			}
    		} // j
                    cout << "\n";
            } // i
            cout << "\n";
    }
    void print_Q()
    {
             cout << " Q = \n";
            for(int i = 0; i <= (row - 1); i++)
            {
                    for(int j = 0; j <= (col - 1); j++)
                    {
                            cout << setw(col - 1) << Q[i][j];
    			if(j < col - 1)
    			{
    				cout << " , ";
    			}
    		} // j
                    cout << "\n";
            } // i
            cout << "\n";
    }
    
    void generate_rand()
    {
    
            i = ((double) rand() / (RAND_MAX)) * (row) ;
            j = ((double) rand() / (RAND_MAX)) * (col) ;
            
            Q_curr_state = Q[i][j];
            
            cout << "\n i = " << i << " , j = " << j << " => Q-value[i][j] = " << Q_curr_state << " \n";
            
    }