diff --git a/MobileRobot/Machine_Learning/Practice/Matrix.txt b/MobileRobot/Machine_Learning/Practice/Matrix.txt new file mode 100644 index 0000000000000000000000000000000000000000..c96d5841335008edfc675716621edc887b5c1b55 --- /dev/null +++ b/MobileRobot/Machine_Learning/Practice/Matrix.txt @@ -0,0 +1,20 @@ +0.00382868 , 0.116191 , 0.745465 , 3.89944 , 8.15744 , 4.01626 , 1.47203 , 0.551333 , 0.174429 , 0.0870335 , 0.0250532 , 0.00912423 , 0.00249166 , 0.00102693 , 0.000419753 , 0.000131468 , 5.00254e-05 , 2.0099e-05 , 8.40168e-06 , + 0.0101646 , 0.136601 , 0.612175 , 1.85587 , 4.20481 , 1.54658 , 0.478056 , 0.24397 , 0.105084 , 0.0525671 , 0.0232909 , 0.00943698 , 0.00320491 , 0.00112817 , 0.00034699 , 0.000126045 , 6.40941e-05 , 1.59779e-05 , 7.12482e-06 , +0.000805043 , 0.0332884 , 0.274046 , 1.08019 , 1.17002 , 0.698387 , 0.30359 , 0.119663 , 0.0579422 , 0.0297607 , 0.0131256 , 0.00460919 , 0.00148765 , 0.000765605 , 0.000216942 , 8.66716e-05 , 4.05771e-05 , 1.33528e-05 , 4.1249e-06 , +2.48899e-05 , 0.0128954 , 0.0802105 , 0.287346 , 0.689204 , 0.346275 , 0.166353 , 0.0684359 , 0.0311509 , 0.0167736 , 0.00770705 , 0.00215702 , 0.0011529 , 0.000538216 , 0.000209333 , 8.77233e-05 , 2.92078e-05 , 7.66706e-06 , 2.55988e-06 , +0.000129703 , 0.00322588 , 0.0205338 , 0.128641 , 0.305247 , 0.173804 , 0.0898347 , 0.0453639 , 0.0163717 , 0.00600538 , 0.00381917 , 0.00155926 , 0.000689671 , 0.000228423 , 7.95174e-05 , 4.45367e-05 , 1.68923e-05 , 5.15815e-06 , 1.66697e-06 , +5.17332e-05 , 0.00126843 , 0.0184666 , 0.0514178 , 0.127207 , 0.0628717 , 0.0314178 , 0.0160154 , 0.00634512 , 0.00294345 , 0.00104471 , 0.00076148 , 0.000375335 , 0.000163789 , 5.89502e-05 , 2.339e-05 , 9.06862e-06 , 3.25677e-06 , 1.00663e-06 , +4.03991e-07 , 6.23162e-05 , 0.00188303 , 0.00859995 , 0.0340181 , 0.025325 , 0.00959944 , 0.00791442 , 0.00304185 , 0.00108383 , 0.000699508 , 0.000268954 , 0.000117074 , 9.07904e-05 , 3.65932e-05 , 1.19388e-05 , 6.80645e-06 , 1.31515e-06 , 6.25406e-07 , + -1.97339 , 5.01968e-05 , 0.000735782 , 0.00252916 , 0.0105147 , 0.00578474 , 0.00506614 , 0.0026018 , 0.00151745 , 0.000714455 , 0.000315345 , 0.000122624 , 6.38013e-05 , 4.00137e-05 , 1.86079e-05 , 8.7881e-06 , 4.12973e-06 , 1.4014e-06 , 5.59912e-07 , + -28.2006 , 1.37268e-05 , 0.000366493 , 0.00152154 , 0.00318546 , 0.00219539 , 0.00198565 , 0.0010295 , 0.000568695 , 0.000243552 , 0.000121169 , 6.63649e-05 , 2.64561e-05 , 2.05659e-05 , 8.72049e-06 , 3.48901e-06 , 2.00535e-06 , 8.20102e-07 , 3.50581e-07 , + -2.37334 , 1.53224e-05 , 0.000170402 , 0.000709633 , 0.00106954 , 0.0010878 , 0.000649422 , 0.00035624 , 0.000168493 , 0.000124677 , 7.17247e-05 , 1.90548e-05 , 1.4612e-05 , 7.66143e-06 , 4.0633e-06 , 1.69388e-06 , 8.95118e-07 , 4.37694e-07 , 1.91993e-07 , + -0.357137 , 1.03247e-05 , 7.81591e-05 , 0.000262295 , 0.000255134 , 0.000269388 , 0.000257623 , 0.000146377 , 5.59075e-05 , 3.91945e-05 , 2.23471e-05 , 1.16324e-05 , 5.09942e-06 , 3.5008e-06 , 1.83621e-06 , 6.90279e-07 , 3.68535e-07 , 1.98699e-07 , 1.06421e-07 , +-0.0355914 , 1.76753e-06 , 1.67835e-05 , 5.89723e-05 , 0.000105021 , 8.62172e-05 , 6.56463e-05 , 4.19768e-05 , 2.39549e-05 , 1.49727e-05 , 9.48064e-06 , 4.59358e-06 , 2.30864e-06 , 1.29036e-06 , 6.71866e-07 , 3.25367e-07 , 1.56607e-07 , 1.04578e-07 , 5.1386e-08 , +-0.00024576 , 7.1379e-07 , 6.31042e-06 , 2.44207e-05 , 5.04218e-05 , 3.12887e-05 , 2.64148e-05 , 1.43321e-05 , 1.06089e-05 , 8.12063e-06 , 3.65203e-06 , 2.59047e-06 , 1.31982e-06 , 6.73679e-07 , 3.67204e-07 , 1.42886e-07 , 6.388e-08 , 5.66555e-08 , 2.36875e-08 , +7.19718e-09 , 8.29073e-08 , 1.65837e-06 , 9.19997e-06 , 2.08386e-05 , 1.40195e-05 , 1.00378e-05 , 5.77964e-06 , 2.89724e-06 , 2.27576e-06 , 1.27591e-06 , 9.47388e-07 , 5.31476e-07 , 2.79771e-07 , 1.71075e-07 , 7.78852e-08 , 4.79251e-08 , 2.57395e-08 , 1.48816e-08 , +2.09905e-09 , 5.91897e-08 , 8.77307e-07 , 4.30383e-06 , 4.83682e-06 , 5.12303e-06 , 3.8108e-06 , 2.47933e-06 , 1.38905e-06 , 1.11872e-06 , 7.17452e-07 , 4.48308e-07 , 2.7743e-07 , 1.59136e-07 , 7.28462e-08 , 3.8976e-08 , 2.20561e-08 , 1.30902e-08 , 6.81637e-09 , +4.50704e-09 , 6.99315e-08 , 3.02224e-07 , 1.26951e-06 , 2.09616e-06 , 1.93872e-06 , 9.87246e-07 , 8.64459e-07 , 8.06824e-07 , 4.42032e-07 , 2.83851e-07 , 1.81637e-07 , 1.34092e-07 , 7.4948e-08 , 4.322e-08 , 1.69923e-08 , 9.22747e-09 , 5.70149e-09 , 3.46033e-09 , + 6.928e-11 , 7.37901e-09 , 1.62669e-07 , 5.12681e-07 , 7.72272e-07 , 4.69803e-07 , 2.66095e-07 , 1.85097e-07 , 2.33462e-07 , 1.6689e-07 , 1.2692e-07 , 1.16261e-07 , 6.24054e-08 , 2.99576e-08 , 1.67464e-08 , 1.03437e-08 , 3.95863e-09 , 2.61051e-09 , 1.66694e-09 , +2.99368e-10 , 2.90294e-09 , 1.42247e-08 , 1.58879e-07 , 2.79446e-07 , 1.77048e-07 , 1.40554e-07 , 9.84467e-08 , 7.42305e-08 , 6.91373e-08 , 6.00053e-08 , 5.27598e-08 , 2.45167e-08 , 1.46483e-08 , 8.48451e-09 , 4.46223e-09 , 2.55844e-09 , 1.51404e-09 , 7.86679e-10 , +1.22124e-10 , 2.49068e-09 , 8.83616e-09 , 6.57874e-08 , 9.11198e-08 , 7.67968e-08 , 4.556e-08 , 4.63382e-08 , 3.39e-08 , 3.50163e-08 , 2.2234e-08 , 1.47875e-08 , 9.44545e-09 , 7.25721e-09 , 4.05418e-09 , 2.26837e-09 , 1.30559e-09 , 5.33087e-10 , 3.92974e-10 , + diff --git a/MobileRobot/Machine_Learning/Practice/Q_Learning b/MobileRobot/Machine_Learning/Practice/Q_Learning index 18a74ef4ae035bfd7a1a2b6ebdf1b7e606facd1e..63fdccb8e49b78ddb9ae0975c27cd78057853bff 100755 Binary files a/MobileRobot/Machine_Learning/Practice/Q_Learning and b/MobileRobot/Machine_Learning/Practice/Q_Learning differ diff --git a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp index 6447846e9a1f50fe7bc7ccb8fe8b6f9050b900d4..69e42864b2b5af46ea7744ca5d4ef0495263346a 100644 --- a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp +++ b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp @@ -3,26 +3,26 @@ #include <iomanip> #include <ctime> #include <cstdlib> - +#include <fstream> using namespace std; - const int row = 9; - const int col = 9; - +const int row = 20; +const int col = 20; +ofstream file; double gamma = .8; double alpha = .1; -double R_step = 120; +double R_step = row*col; -double R[row][col] = {0}; -double Q[row][col] = {0}; +double R[row][col] = { 0 }; +double Q[row][col] = { 0 }; int iterations; int it_; int user_action; double Q_next_state; -int i,j; +int i, j; double Q_curr_state = Q[i][j]; double reward; @@ -39,208 +39,244 @@ double sample; void print_R(); void print_Q(); +void save2file(); void iANDj_Generator(); int main() { - R[R_indx_i][R_indx_j] = 50; // reward - R[P_indx_i][P_indx_j] = -60; // punishment - - print_R(); - - cout << "\n iterations ? \n" ; - cin >> it_; - - /* initialize random seed: */ - srand (time(NULL)); - - while ( iterations < it_ ) - { - if (user_action == 1 && i != 0) // North - { - reward = R[i][j]; - Q_next_state = Q[i - 1][j]; - - sample = reward + gamma * Q_next_state; - Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); - Q_curr_state = Q_next_state; - //printf(" Q_current_state = %f \n",Q_curr_state); - i--; - counter++; - } else if (user_action == 1 && i == 0) // North - { - cout << "You can't go further up!\n"; - } else if (user_action == 3 && i < (row - 1)) // South, i < row - { - reward = R[i][j]; - Q_next_state = Q[i + 1][j]; - - sample = reward + gamma * Q_next_state; - Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); - Q_curr_state = Q_next_state; - //printf(" Q_current_state = %f \n",Q_curr_state); - i++; - counter++; - } else if (user_action == 3 && i >= (row - 1)) // South - { - cout << "You can't go further down!\n"; - } else if (user_action == 2 && j < (col - 1)) // East - { - reward = R[i][j]; - Q_next_state = Q[i][j + 1]; - - sample = reward + gamma * Q_next_state; - Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); - Q_curr_state = Q_next_state; - //printf(" Q_current_state = %f \n",Q_curr_state); - j++; - counter++; - } else if (user_action == 2 && j >= (col - 1)) // East, j > col - { - cout << "You can't go further right!\n"; - } else if (user_action == 4 && j != 0 ) // West - { - reward = R[i][j]; - Q_next_state = Q[i][j - 1]; - - sample = reward + gamma * Q_next_state; - Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); - Q_curr_state = Q_next_state; - //printf(" Q_current_state = %f \n",Q_curr_state); - j--; - counter++; - } else if (user_action == 4 && j == 0) // West, j = 1 - { - cout << "You can't go further left!\n"; - } else if (user_action == 0) // start - { - cout << "\nGenerating random pose in grid for 1st. time!\n"; - iANDj_Generator(); - } - - // + Reward - if (i == R_indx_i && j == R_indx_j) - { - Time_Reward = -counter; - cout << " Time Reward = "<< Time_Reward << "\n"; - - if(abs(Time_Reward) <= R_step) - { - - cout << "\n Goal is achieved <= " << R_step << " time steps\n"; - reward = R[i][j]; - Q_next_state = 0; - - sample = reward + gamma * Q_next_state; - Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); - } else - { - cout << "\n Goal is achieved > " << R_step << " time steps => time_punishment\n"; - reward = -1; // ??? - Q_next_state = 0; - - sample = reward + gamma * Q_next_state; - Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); - } - - counter = 0; - print_Q(); - iANDj_Generator(); - iterations++; - } else if (i == P_indx_i && j == P_indx_j) // - Reward => Punishment - { - cout << "\n Failed to achieve a goal! \n"; - - reward = R[i][j]; - Q_next_state = 0; - - sample = reward + gamma * Q_next_state; - Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); - - print_Q(); - iANDj_Generator(); - iterations++; - } - - - cout << "\n Q_value = " << Q_curr_state << " , actions N(1), E(2), S(3), W(4) : "; - - /*if(i <= R_indx_i && j > R_indx_j) // current pose : North-East (1) - { - user_action = ((double) rand() / (RAND_MAX)) * (5 - 3) + 3; - } else if (i <= R_indx_i && j <= R_indx_j) // current pose : North-West (2) - { - user_action = ((double) rand() / (RAND_MAX)) * (4 - 2) + 2; - } else if (i > R_indx_i && j <= R_indx_j) // current pose : South-West (3) - { - user_action = ((double) rand() / (RAND_MAX)) * (3 - 1) + 1; - } else if (i > R_indx_i && j > R_indx_j) // current pose : South-East (4) - { - user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; - }*/ - - if( j > R_indx_j) // current pose : Right (1) - { - cout << "\n Right Side of the goal ... \n"; - user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; - } else if (j <= R_indx_j) // current pose : Left (2) - { - cout << "\n Left Side of the goal ... \n"; - user_action = ((double) rand() / (RAND_MAX)) * (4 - 1) + 1; - } - - - //cin >> user_action; - printf(" user action = %i \n",user_action); - - } -return 0; -} + R[R_indx_i][R_indx_j] = 50; // reward + R[P_indx_i][P_indx_j] = -60; // punishment + + file.open("Matrix.txt"); + + print_R(); + + cout << "\n iterations ? \n"; + cin >> it_; + + /* initialize random seed: */ + srand(time(NULL)); + + while (iterations < it_) + { + // -------------------------------- Actions ------------------------------ + if (user_action == 1 && i != 0) // North + { + reward = R[i][j]; + Q_next_state = Q[i - 1][j]; + + sample = reward + gamma * Q_next_state; + Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); + Q_curr_state = Q_next_state; + //printf(" Q_current_state = %f \n",Q_curr_state); + i--; + counter++; + } + else if (user_action == 1 && i == 0) // North + { + cout << "You can't go further up!\n"; + } + else if (user_action == 3 && i < (row - 1)) // South, i < row + { + reward = R[i][j]; + Q_next_state = Q[i + 1][j]; + + sample = reward + gamma * Q_next_state; + Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); + Q_curr_state = Q_next_state; + //printf(" Q_current_state = %f \n",Q_curr_state); + i++; + counter++; + } + else if (user_action == 3 && i >= (row - 1)) // South + { + cout << "You can't go further down!\n"; + } + else if (user_action == 2 && j < (col - 1)) // East + { + reward = R[i][j]; + Q_next_state = Q[i][j + 1]; + + sample = reward + gamma * Q_next_state; + Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); + Q_curr_state = Q_next_state; + //printf(" Q_current_state = %f \n",Q_curr_state); + j++; + counter++; + } + else if (user_action == 2 && j >= (col - 1)) // East, j > col + { + cout << "You can't go further right!\n"; + } + else if (user_action == 4 && j != 0) // West + { + reward = R[i][j]; + Q_next_state = Q[i][j - 1]; + + sample = reward + gamma * Q_next_state; + Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); + Q_curr_state = Q_next_state; + //printf(" Q_current_state = %f \n",Q_curr_state); + j--; + counter++; + } + else if (user_action == 4 && j == 0) // West, j = 1 + { + cout << "You can't go further left!\n"; + } + else if (user_action == 0) // start + { + cout << "\nGenerating random pose in grid for 1st. time!\n"; + iANDj_Generator(); + } + + + // ------------------------------- Reward Distribution --------------------------- + // + Reward + if (i == R_indx_i && j == R_indx_j) + { + Time_Reward = -counter; + cout << " Time Reward = " << Time_Reward << "\n"; + + if (abs(Time_Reward) <= R_step) + { + + cout << "\n Goal is achieved <= " << R_step << " time steps\n"; + reward = R[i][j]; + Q_next_state = 0; + + sample = reward + gamma * Q_next_state; + Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); + } + else + { + cout << "\n Goal is achieved > " << R_step << " time steps => time_punishment\n"; + reward = -1; // ??? + Q_next_state = 0; + + sample = reward + gamma * Q_next_state; + Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); + } + counter = 0; + print_Q(); + //save2file(); + iANDj_Generator(); + iterations++; + } + else if (i == P_indx_i && j == P_indx_j) // - Reward => Punishment + { + cout << "\n Failed to achieve a goal! \n"; + + reward = R[i][j]; + Q_next_state = 0; + + sample = reward + gamma * Q_next_state; + Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); + + print_Q(); + //save2file(); + iANDj_Generator(); + iterations++; + } + + cout << "\n Q_value = " << Q_curr_state << " , actions N(1), E(2), S(3), W(4) : "; + + /*if(i <= R_indx_i && j > R_indx_j) // current pose : North-East (1) + { + user_action = ((double) rand() / (RAND_MAX)) * (5 - 3) + 3; + } else if (i <= R_indx_i && j <= R_indx_j) // current pose : North-West (2) + { + user_action = ((double) rand() / (RAND_MAX)) * (4 - 2) + 2; + } else if (i > R_indx_i && j <= R_indx_j) // current pose : South-West (3) + { + user_action = ((double) rand() / (RAND_MAX)) * (3 - 1) + 1; + } else if (i > R_indx_i && j > R_indx_j) // current pose : South-East (4) + { + user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; + }*/ + + + if (j > R_indx_j) // current pose : Right (1) + { + cout << "\n Right Side of the goal ... \n"; + user_action = ((double)rand() / (RAND_MAX)) * (5 - 1) + 1; + } + else if (j <= R_indx_j) // current pose : Left (2) + { + cout << "\n Left Side of the goal ... \n"; + user_action = ((double)rand() / (RAND_MAX)) * (4 - 1) + 1; + } + + + + //cin >> user_action; + printf(" user action = %i \n", user_action); + + } + save2file(); + return 0; +} void print_R() { - cout << " R = \n"; - for(int i = 0; i <= (row - 1); i++) - { - for(int j = 0; j <= (col - 1); j++) - { - cout << setw(col - 1) << R[i][j]; - if(j < col - 1) + cout << " R = \n"; + for (int i = 0; i <= (row - 1); i++) + { + for (int j = 0; j <= (col - 1); j++) + { + cout << setw(col - 1) << R[i][j]; + if (j < col - 1) { cout << " , "; } } // j - cout << "\n"; - } // i - cout << "\n"; + cout << "\n"; + } // i + cout << "\n"; } void print_Q() { - cout << " Q = \n"; - for(int i = 0; i <= (row - 1); i++) - { - for(int j = 0; j <= (col - 1); j++) - { - cout << setw(col - 1) << Q[i][j]; - if(j < col - 1) + cout << " Q = \n"; + for (int i = 0; i <= (row - 1); i++) + { + for (int j = 0; j <= (col - 1); j++) + { + cout << setw(col - 1) << Q[i][j]; + if (j < col - 1) { cout << " , "; } } // j - cout << "\n"; - } // i - cout << "\n"; + cout << "\n"; + } // i + cout << "\n"; } void iANDj_Generator() { - // Generate Random Pose for current state (position) - - i = ((double) rand() / (RAND_MAX)) * (row) ; - j = ((double) rand() / (RAND_MAX)) * (col) ; - - Q_curr_state = Q[i][j]; - - cout << "\n i = " << i << " , j = " << j << " => Q[i][j] = " << Q_curr_state << " \n"; - + // Generate Random Pose for current state (position) + + i = ((double)rand() / (RAND_MAX)) * (row); + j = ((double)rand() / (RAND_MAX)) * (col); + + Q_curr_state = Q[i][j]; + + cout << "\n i = " << i << " , j = " << j << " => Q[i][j] = " << Q_curr_state << " \n"; + +} +void save2file() +{ + for (int k = 0; k < row - 1; k++) + { + for (int l = 0; l < col - 1; l++) + { + file << setw(10) << Q[k][l]; + if (l < col - 1) + { + file << " , "; + } + } + file << "\n"; + } + file << "\n"; } diff --git a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~ b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~ index 9a661abb87a479e878aa7eddb9474d6145c0a94c..bdad0449dd44be0db478491ee9014181f75269cb 100644 --- a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~ +++ b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~ @@ -3,26 +3,27 @@ #include <iomanip> #include <ctime> #include <cstdlib> - +#include <fstream> +#include <windows.h> using namespace std; - const int row = 9; - const int col = 9; - +const int row = 20; +const int col = 20; +ofstream file; double gamma = .8; double alpha = .1; -double R_step = 120; +double R_step = row*col; -double R[row][col] = {0}; -double Q[row][col] = {0}; +double R[row][col] = { 0 }; +double Q[row][col] = { 0 }; int iterations; -int it_ = 1; +int it_; int user_action; double Q_next_state; -int i,j; +int i, j; double Q_curr_state = Q[i][j]; double reward; @@ -39,208 +40,244 @@ double sample; void print_R(); void print_Q(); +void save2file(); void iANDj_Generator(); int main() { - R[R_indx_i][R_indx_j] = 50; // reward - R[P_indx_i][P_indx_j] = -60; // punishment - - print_R(); - - cout << "\n iterations ? \n" ; - cin >> it_; - - /* initialize random seed: */ - srand (time(NULL)); - - while ( iterations < it_ ) - { - if (user_action == 1 && i != 0) // North - { - reward = R[i][j]; - Q_next_state = Q[i - 1][j]; - - sample = reward + gamma * Q_next_state; - Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); - Q_curr_state = Q_next_state; - //printf(" Q_current_state = %f \n",Q_curr_state); - i--; - counter++; - } else if (user_action == 1 && i == 0) // North - { - cout << "You can't go further up!\n"; - } else if (user_action == 3 && i < (row - 1)) // South, i < row - { - reward = R[i][j]; - Q_next_state = Q[i + 1][j]; - - sample = reward + gamma * Q_next_state; - Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); - Q_curr_state = Q_next_state; - //printf(" Q_current_state = %f \n",Q_curr_state); - i++; - counter++; - } else if (user_action == 3 && i >= (row - 1)) // South - { - cout << "You can't go further down!\n"; - } else if (user_action == 2 && j < (col - 1)) // East - { - reward = R[i][j]; - Q_next_state = Q[i][j + 1]; - - sample = reward + gamma * Q_next_state; - Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); - Q_curr_state = Q_next_state; - //printf(" Q_current_state = %f \n",Q_curr_state); - j++; - counter++; - } else if (user_action == 2 && j >= (col - 1)) // East, j > col - { - cout << "You can't go further right!\n"; - } else if (user_action == 4 && j != 0 ) // West - { - reward = R[i][j]; - Q_next_state = Q[i][j - 1]; - - sample = reward + gamma * Q_next_state; - Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); - Q_curr_state = Q_next_state; - //printf(" Q_current_state = %f \n",Q_curr_state); - j--; - counter++; - } else if (user_action == 4 && j == 0) // West, j = 1 - { - cout << "You can't go further left!\n"; - } else if (user_action == 0) // start - { - cout << "\nGenerating random pose in grid for 1st. time!\n"; - iANDj_Generator(); - } - - // + Reward - if (i == R_indx_i && j == R_indx_j) - { - Time_Reward = -counter; - cout << " Time Reward = "<< Time_Reward << "\n"; - - if(abs(Time_Reward) <= R_step) - { - - cout << "\n Goal is achieved <= " << R_step << " time steps\n"; - reward = R[i][j]; - Q_next_state = 0; - - sample = reward + gamma * Q_next_state; - Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); - } else - { - cout << "\n Goal is achieved > " << R_step << " time steps => time_punishment\n"; - reward = -1; // ??? - Q_next_state = 0; - - sample = reward + gamma * Q_next_state; - Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); - } - - counter = 0; - print_Q(); - iANDj_Generator(); - iterations++; - } else if (i == P_indx_i && j == P_indx_j) // - Reward => Punishment - { - cout << "\n Failed to achieve a goal! \n"; - - reward = R[i][j]; - Q_next_state = 0; - - sample = reward + gamma * Q_next_state; - Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); - - print_Q(); - iANDj_Generator(); - iterations++; - } - - - cout << "\n Q_value = " << Q_curr_state << " , actions N(1), E(2), S(3), W(4) : "; - - /*if(i <= R_indx_i && j > R_indx_j) // current pose : North-East (1) - { - user_action = ((double) rand() / (RAND_MAX)) * (5 - 3) + 3; - } else if (i <= R_indx_i && j <= R_indx_j) // current pose : North-West (2) - { - user_action = ((double) rand() / (RAND_MAX)) * (4 - 2) + 2; - } else if (i > R_indx_i && j <= R_indx_j) // current pose : South-West (3) - { - user_action = ((double) rand() / (RAND_MAX)) * (3 - 1) + 1; - } else if (i > R_indx_i && j > R_indx_j) // current pose : South-East (4) - { - user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; - }*/ - - if( j > R_indx_j) // current pose : Right (1) - { - cout << "\n Right Side of the goal ... \n"; - user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; - } else if (j <= R_indx_j) // current pose : Left (2) - { - cout << "\n Left Side of the goal ... \n"; - user_action = ((double) rand() / (RAND_MAX)) * (4 - 1) + 1; - } - - - //cin >> user_action; - printf(" user action = %i \n",user_action); - - } -return 0; -} + R[R_indx_i][R_indx_j] = 50; // reward + R[P_indx_i][P_indx_j] = -60; // punishment + + file.open("Matrix.txt"); + + print_R(); + + cout << "\n iterations ? \n"; + cin >> it_; + + /* initialize random seed: */ + srand(time(NULL)); + + while (iterations < it_) + { + // -------------------------------- Actions ------------------------------ + if (user_action == 1 && i != 0) // North + { + reward = R[i][j]; + Q_next_state = Q[i - 1][j]; + + sample = reward + gamma * Q_next_state; + Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); + Q_curr_state = Q_next_state; + //printf(" Q_current_state = %f \n",Q_curr_state); + i--; + counter++; + } + else if (user_action == 1 && i == 0) // North + { + cout << "You can't go further up!\n"; + } + else if (user_action == 3 && i < (row - 1)) // South, i < row + { + reward = R[i][j]; + Q_next_state = Q[i + 1][j]; + + sample = reward + gamma * Q_next_state; + Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); + Q_curr_state = Q_next_state; + //printf(" Q_current_state = %f \n",Q_curr_state); + i++; + counter++; + } + else if (user_action == 3 && i >= (row - 1)) // South + { + cout << "You can't go further down!\n"; + } + else if (user_action == 2 && j < (col - 1)) // East + { + reward = R[i][j]; + Q_next_state = Q[i][j + 1]; + + sample = reward + gamma * Q_next_state; + Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); + Q_curr_state = Q_next_state; + //printf(" Q_current_state = %f \n",Q_curr_state); + j++; + counter++; + } + else if (user_action == 2 && j >= (col - 1)) // East, j > col + { + cout << "You can't go further right!\n"; + } + else if (user_action == 4 && j != 0) // West + { + reward = R[i][j]; + Q_next_state = Q[i][j - 1]; + + sample = reward + gamma * Q_next_state; + Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); + Q_curr_state = Q_next_state; + //printf(" Q_current_state = %f \n",Q_curr_state); + j--; + counter++; + } + else if (user_action == 4 && j == 0) // West, j = 1 + { + cout << "You can't go further left!\n"; + } + else if (user_action == 0) // start + { + cout << "\nGenerating random pose in grid for 1st. time!\n"; + iANDj_Generator(); + } + + + // ------------------------------- Reward Distribution --------------------------- + // + Reward + if (i == R_indx_i && j == R_indx_j) + { + Time_Reward = -counter; + cout << " Time Reward = " << Time_Reward << "\n"; + + if (abs(Time_Reward) <= R_step) + { + + cout << "\n Goal is achieved <= " << R_step << " time steps\n"; + reward = R[i][j]; + Q_next_state = 0; + + sample = reward + gamma * Q_next_state; + Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); + } + else + { + cout << "\n Goal is achieved > " << R_step << " time steps => time_punishment\n"; + reward = -1; // ??? + Q_next_state = 0; + + sample = reward + gamma * Q_next_state; + Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); + } + counter = 0; + print_Q(); + //save2file(); + iANDj_Generator(); + iterations++; + } + else if (i == P_indx_i && j == P_indx_j) // - Reward => Punishment + { + cout << "\n Failed to achieve a goal! \n"; + + reward = R[i][j]; + Q_next_state = 0; + + sample = reward + gamma * Q_next_state; + Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); + + print_Q(); + //save2file(); + iANDj_Generator(); + iterations++; + } + + cout << "\n Q_value = " << Q_curr_state << " , actions N(1), E(2), S(3), W(4) : "; + + /*if(i <= R_indx_i && j > R_indx_j) // current pose : North-East (1) + { + user_action = ((double) rand() / (RAND_MAX)) * (5 - 3) + 3; + } else if (i <= R_indx_i && j <= R_indx_j) // current pose : North-West (2) + { + user_action = ((double) rand() / (RAND_MAX)) * (4 - 2) + 2; + } else if (i > R_indx_i && j <= R_indx_j) // current pose : South-West (3) + { + user_action = ((double) rand() / (RAND_MAX)) * (3 - 1) + 1; + } else if (i > R_indx_i && j > R_indx_j) // current pose : South-East (4) + { + user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; + }*/ + + + if (j > R_indx_j) // current pose : Right (1) + { + cout << "\n Right Side of the goal ... \n"; + user_action = ((double)rand() / (RAND_MAX)) * (5 - 1) + 1; + } + else if (j <= R_indx_j) // current pose : Left (2) + { + cout << "\n Left Side of the goal ... \n"; + user_action = ((double)rand() / (RAND_MAX)) * (4 - 1) + 1; + } + + + + //cin >> user_action; + printf(" user action = %i \n", user_action); + + } + save2file(); + return 0; +} void print_R() { - cout << " R = \n"; - for(int i = 0; i <= (row - 1); i++) - { - for(int j = 0; j <= (col - 1); j++) - { - cout << setw(col - 1) << R[i][j]; - if(j < col - 1) + cout << " R = \n"; + for (int i = 0; i <= (row - 1); i++) + { + for (int j = 0; j <= (col - 1); j++) + { + cout << setw(col - 1) << R[i][j]; + if (j < col - 1) { cout << " , "; } } // j - cout << "\n"; - } // i - cout << "\n"; + cout << "\n"; + } // i + cout << "\n"; } void print_Q() { - cout << " Q = \n"; - for(int i = 0; i <= (row - 1); i++) - { - for(int j = 0; j <= (col - 1); j++) - { - cout << setw(col - 1) << Q[i][j]; - if(j < col - 1) + cout << " Q = \n"; + for (int i = 0; i <= (row - 1); i++) + { + for (int j = 0; j <= (col - 1); j++) + { + cout << setw(col - 1) << Q[i][j]; + if (j < col - 1) { cout << " , "; } } // j - cout << "\n"; - } // i - cout << "\n"; + cout << "\n"; + } // i + cout << "\n"; } void iANDj_Generator() { - // Generate Random Pose for current state (position) - - i = ((double) rand() / (RAND_MAX)) * (row) ; - j = ((double) rand() / (RAND_MAX)) * (col) ; - - Q_curr_state = Q[i][j]; - - cout << "\n i = " << i << " , j = " << j << " => Q[i][j] = " << Q_curr_state << " \n"; - + // Generate Random Pose for current state (position) + + i = ((double)rand() / (RAND_MAX)) * (row); + j = ((double)rand() / (RAND_MAX)) * (col); + + Q_curr_state = Q[i][j]; + + cout << "\n i = " << i << " , j = " << j << " => Q[i][j] = " << Q_curr_state << " \n"; + +} +void save2file() +{ + for (int k = 0; k < row - 1; k++) + { + for (int l = 0; l < col - 1; l++) + { + file << setw(10) << Q[k][l]; + if (l < col - 1) + { + file << " , "; + } + } + file << "\n"; + } + file << "\n"; } diff --git a/MobileRobot/Machine_Learning/Practice/save_data_2_file.cpp~ b/MobileRobot/Machine_Learning/Practice/save_data_2_file.cpp~ deleted file mode 100644 index f809adbc708ebf46ee5368a940ed10851d97f9c9..0000000000000000000000000000000000000000 --- a/MobileRobot/Machine_Learning/Practice/save_data_2_file.cpp~ +++ /dev/null @@ -1,68 +0,0 @@ -/*#include <iostream> -#include <fstream> - -using namespace std; - -int main() -{ -ofstream outputFile; -outputFile.open("test.txt"); - -float num1, num2, num3, num4, num5; -char name1, name2, name3, name4, name5; - -cout << "Enter the first number: "; -cin >> num1; -outputFile << num1 << endl; - -cout << "Enter the second number: "; -cin >> num2; -outputFile << num2 << endl; - -cout << "Enter the third number: "; -cin >> num3; -outputFile << num3 << endl; - -cout << "Enter the fourth number: "; -cin >> num4; -outputFile << num4 << endl; - -cout << "Enter the fifth number: "; -cin >> num5; -outputFile << num5 << endl; - -outputFile.close(); -cout << "Done!\n"; - -return 0; -}*/ - -#include <iostream> // library that contain basic input/output functions -#include <fstream> // library that contains file input/output functions -using namespace std; - -int main() -{ - - char array[] = {'H','e','l','l','o',' ','W','o','r','l','d','!','\0'}; //array to write into file - - ofstream fout("test.txt"); //opening an output stream for file test.txt - /*checking whether file could be opened or not. If file does not exist or don't have write permissions, file stream could not be opened.*/ - if(fout.is_open()) - { - //file opened successfully so we are here - cout << "File Opened successfully!!!. Writing data from array to file" << endl; - - for(int i = 0; array[i] != "\0"; i++) - { - fout << array[i]; //writing ith character of array in the file - } - cout << "Array data successfully saved into the file test.txt" << endl; - } - else //file could not be opened - { - cout << "File could not be opened." << endl; - } - return 0; -} - diff --git a/MobileRobot/Machine_Learning/Practice/test.txt b/MobileRobot/Machine_Learning/Practice/test.txt deleted file mode 100644 index c57eff55ebc0c54973903af5f72bac72762cf4f4..0000000000000000000000000000000000000000 --- a/MobileRobot/Machine_Learning/Practice/test.txt +++ /dev/null @@ -1 +0,0 @@ -Hello World! \ No newline at end of file diff --git a/MobileRobot/Machine_Learning/Practice/vector_iterator.cpp~ b/MobileRobot/Machine_Learning/Practice/vector_iterator.cpp~ deleted file mode 100644 index 5b542380f3f4e6107bec6b26515c592c25c70a15..0000000000000000000000000000000000000000 --- a/MobileRobot/Machine_Learning/Practice/vector_iterator.cpp~ +++ /dev/null @@ -1,33 +0,0 @@ -#include <iostream> // just to output and show what's going on -#include <vector> // include vectors and iterators - -using namespace std; - -int main (){ - vector <int> myVec; // declare a vector - // fill myVec with some values - for (int i = 0; i < 10; i++) - myVec.push_back(i); - - vector <int> myVec2; // declare a second vector - // fill myVec2 with some values - for (int i = 10; i < 20; i++) - myVec2.push_back(i); - - vector <int>::iterator It; - - cout << "1st. vector : "<< endl; - for (It = myVec.begin(); It != myVec.end(); ++It) - cout << *It << "\t"; // output the current value that It is *pointing to - - // print a new line - cout << endl; - - cout << "2nd. vector : "<< endl; - for (It = myVec2.begin(); It != myVec2.end(); ++It) - cout << *It << "\t"; // output the current value that It is *pointing to - - cout << endl; - return 0; -} -