From 7a03e97cc541d1408b6ab3f99b732256fa796d01 Mon Sep 17 00:00:00 2001 From: Farid Alijani <farid.alijani@student.lut.fi> Date: Mon, 11 Apr 2016 21:19:42 +0200 Subject: [PATCH] Q learning ro mobile robot --- .../CamMark/camtomar/include/VisionControl.h | 15 ++- .../CamMark/camtomar/include/VisionControl.h~ | 50 ++++++---- .../CamMark/camtomar/src/VisionControl.cpp | 97 +++++++++++++++---- .../CamMark/camtomar/src/VisionControl.cpp~ | 13 ++- .../Machine_Learning/Practice/Q_learning.cpp | 5 +- .../Machine_Learning/Practice/Q_learning.cpp~ | 10 +- 6 files changed, 133 insertions(+), 57 deletions(-) diff --git a/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h b/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h index e7e8fbd8..c462fe23 100644 --- a/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h +++ b/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h @@ -97,6 +97,7 @@ private: void dock(double VelX, double VelY, double omegaZ); void move2docking(double VelX_est, double VelY_est, double omegaZ_est); + //double* Q; public: @@ -163,18 +164,19 @@ public: // ---- Q_LEARNING PARAMETERS ------ // static double x_new,y_new,theta_new; // inp (y,theta) + static const double y_up,y_dwn,theta_up,theta_dwn; static double Kp_y,Ki_y,Kd_y; // actions (Kp,Ki,Kd) static int R_indx_i, R_indx_j, P_indx_i, P_indx_j; - static int row, col; + static const int row, col; static double gamma; static double alpha; static double R_step; - double R[row][col]; - double Q[row][col]; + static double R[5][5]; + static double Q[5][5]; static double Q_curr_state,Q_next_state; static double reward; @@ -182,7 +184,10 @@ public: static int Time_Reward; static double sample; - void Q_Learning(); + void Q_Learning(double y, double theta); + void print_Q(); + + void print_R(); // ---- Q_LEARNING PARAMETERS ------ // @@ -202,6 +207,6 @@ public: double marpose[6]; double camPose[6]; - double RefPose[6]; + static const double RefPose[4]; }; diff --git a/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h~ b/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h~ index 9619df65..e7e8fbd8 100644 --- a/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h~ +++ b/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h~ @@ -121,15 +121,11 @@ public: void GenerateRandomVal(); void RandomPose(double X_rand, double Y_rand, double theta_rand); - static double Pos_Px,Pos_Ix,Pos_Dx; - static double Pos_Py,Pos_Iy,Pos_Dy; + static double Kp_x,Ki_x,Kd_x; + + static double Kp_theta,Ki_theta,Kd_theta; - static double S_Ang_P,S_Ang_I,S_Ang_D; - static double L_Ang_P,L_Ang_I,L_Ang_D; - - static double Ang_P,Ang_I,Ang_D; - - static double x_new,y_new,theta_new; + static double safety_margin_X; double x_rand_SM; @@ -163,7 +159,34 @@ public: static double control_signalYAW; // ---- CONTROLL PARAMETERS ------ // - + + // ---- Q_LEARNING PARAMETERS ------ // + + static double x_new,y_new,theta_new; // inp (y,theta) + static double Kp_y,Ki_y,Kd_y; // actions (Kp,Ki,Kd) + + static int R_indx_i, R_indx_j, P_indx_i, P_indx_j; + + + static int row, col; + static double gamma; + static double alpha; + static double R_step; + + double R[row][col]; + double Q[row][col]; + + static double Q_curr_state,Q_next_state; + static double reward; + static int counter; + static int Time_Reward; + static double sample; + + void Q_Learning(); + + // ---- Q_LEARNING PARAMETERS ------ // + + // ---- TIMER PARAMETERS ------ // static int msec,sec,min,hr; // ---- TIMER PARAMETERS ------ // @@ -181,13 +204,4 @@ public: double camPose[6]; double RefPose[6]; - /*double prev_error[6]; - double int_error[6]; - double curr_error[6]; - double diff[6]; - double p_term[6]; - double d_term[6]; - double i_term[6]; - double control_signal[6];*/ - }; diff --git a/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp b/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp index 9552bb01..53517b4f 100644 --- a/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp +++ b/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp @@ -85,10 +85,13 @@ double ImageConverter::p_termYAW = 0; double ImageConverter::i_termYAW = 0; double ImageConverter::d_termYAW = 0; - double ImageConverter::control_signalX, ImageConverter::control_signalY, ImageConverter::control_signalYAW; // ---- CONTROLL PARAMETERS ------ // + +// ---- Ref. Values ---- // +const double ImageConverter::RefPose[4] = {-.0957, -0.0605845 /* Y_ref*/ , 0.303369 /* X_ref*/ , 0.586541 /* theta_ref*/}; + // ---------------- PID gains---------------- // double ImageConverter::Kp_y = .4; double ImageConverter::Ki_y = .0042; @@ -100,6 +103,12 @@ double ImageConverter::Kd_theta = .2 * Kd_y; // ---------------- PID gains---------------- // // random pose initialized + +const double ImageConverter::y_up = .3; +const double ImageConverter::y_dwn = -.1; +const double ImageConverter::theta_dwn = -.3*RefPose[3]; +const double ImageConverter::theta_up = .3*RefPose[3]; + double ImageConverter::x_new,ImageConverter::y_new,ImageConverter::theta_new; // ---------------- Q_LEARNING PARAMETERS ---------------- // @@ -108,11 +117,12 @@ double ImageConverter::gamma = .8; double ImageConverter::alpha = .1; double ImageConverter::R_step = 200; +const int ImageConverter::row = 5/*(y_up - y_dwn) * 10000*/; +const int ImageConverter::col = 5/*(theta_up - theta_dwn) * 1000*/; -int ImageConverter::row = ; -int ImageConverter::col = ; +double ImageConverter::Q[5][5] = {0}; +double ImageConverter::R[5][5] = {0}; -double ImageConverter::Q_curr_state = Q[y_new][theta_new]; double ImageConverter::Q_next_state; int ImageConverter::counter = 1; @@ -120,8 +130,11 @@ int ImageConverter::Time_Reward; double ImageConverter::sample; +int ImageConverter::R_indx_i = 0; +int ImageConverter::R_indx_j = 2; - +int ImageConverter::P_indx_i = 1; +int ImageConverter::P_indx_j = 1; // ---------------- Q_LEARNING PARAMETERS ---------------- // double ImageConverter::dock_started,ImageConverter::dock_finished,ImageConverter::docking_duration; @@ -153,12 +166,15 @@ double ImageConverter::docking_counter = 1; /* initialize random seed: */ srand (time(NULL)); -// Ref. Values +/*// Ref. Values RefPose[0] = -.0957; -RefPose[1] = -0.0194504; -RefPose[2] = 0.304966; -RefPose[3] = 0.703702; +RefPose[1] = -0.0193378; +RefPose[2] = 0.306532; +RefPose[3] = 0.702702;*/ + + R[R_indx_i][R_indx_j] = 50; // reward + R[P_indx_i][P_indx_j] = -60; // punishment x_rand_SM = RefPose[2] + .55; // 55 cm spreading domain in the x-axis while moving towards the random pose @@ -513,11 +529,11 @@ camPose[3] = CamFB->pose.orientation.x; // theta orientation dock_finished = ros::Time::now().toSec(); docking_duration = dock_finished - dock_started; ROS_INFO_STREAM("docking No. " << docking_counter << " is finished in "<< docking_duration <<" sec, moving to new Random Pose\n"); - //keepMoving = false; + keepMoving = false; GenerateRandomVal(); docking_counter ++; speed_reducer = 1; - Go2RandomPose = true; + //Go2RandomPose = true; // to make sure that y & theta are within the threshold... } else if (abs(RefPose[2] - camPose[2]) <= safety_margin_X) @@ -541,6 +557,9 @@ camPose[3] = CamFB->pose.orientation.x; // theta orientation }else { Controller(RefPose[2], camPose[2], RefPose[1], camPose[1], RefPose[3], camPose[3],.01); + Q_Learning(camPose[1],camPose[3]); // performing Q_Learning... + //print_R(); + } } else { @@ -575,9 +594,6 @@ void ImageConverter::Controller(double RefX, double MarPoseX, double RefY, doubl { control_signalX = 0; } - - - // -----------------Y--------------------- // if((RefY - MarPoseY) < -Py_eps || (RefY - MarPoseY) > Py_eps) @@ -719,6 +735,7 @@ void ImageConverter::move2docking(double VelX_est, double VelY_est, double omega void ImageConverter::GenerateRandomVal() { + // ---------------- PID gains ------------------ Kp_y = ((double) rand() / (RAND_MAX)) * (.76 - .4) + .4; //.1 < Kp < .76 Ki_y = ((double) rand() / (RAND_MAX)) * .006; // 0 < Ki < .006 @@ -726,8 +743,8 @@ void ImageConverter::GenerateRandomVal() // ------------------ Generating Random Pose ------------------ x_new = ((double) rand() / (RAND_MAX)) * (1.1 - x_rand_SM) + x_rand_SM; - y_new = ((double) rand() / (RAND_MAX)) * (.48 - (-.24)) + (-.24); // will be used for Q_Learning - theta_new = ((double) rand() / (RAND_MAX)) * (.5*RefPose[3] - (-.5*RefPose[3])) + (-.5*RefPose[3]); // will be used for Q_Learning + y_new = ((double) rand() / (RAND_MAX)) * (y_up - y_dwn) + y_dwn; // will be used for Q_Learning + theta_new = ((double) rand() / (RAND_MAX)) * (theta_up - theta_dwn) + theta_dwn; // will be used for Q_Learning } void ImageConverter::RandomPose(double X_rand, double Y_rand, double theta_rand) @@ -834,9 +851,10 @@ commandPub.publish(msg_new); } -void ImageConverter::Q_Learning() +void ImageConverter::Q_Learning(double y, double theta) { - while ( iterations <= it_ ) + +/* while ( iterations <= it_ ) { if (user_action == 1 && y_new != 0) // North { @@ -922,7 +940,7 @@ void ImageConverter::Q_Learning() } counter = 0; - } else if (y_new == P_indx_i && theta_new == P_indx_j) // - Reward => Punishment + } else if (y_new == P_indx_i && theta_new == P_indx_j) // (-) Reward => Punishment { cout << "\n Failed to achieve a goal! \n"; @@ -937,5 +955,44 @@ void ImageConverter::Q_Learning() user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; // Generating random user_action printf(" ramdom user action = %i \n",user_action); //cin >> user_action; - } + }*/ +} + + +void ImageConverter::print_R() +{ + cout << " R = \n"; + for(int i = 0; i <= (row - 1); i++) + { + for(int j = 0; j <= (col - 1); j++) + { + cout << setw(col - 1) << R[i][j]; + if(j < col - 1) + { + cout << " , "; + } + } // j + cout << "\n"; + } // i + cout << "\n"; } + + +void ImageConverter::print_Q() +{ + cout << " Q = \n"; + for(int i = 0; i <= (row - 1); i++) + { + for(int j = 0; j <= (col - 1); j++) + { + cout << setw(col - 1) << Q[i][j]; + if(j < col - 1) + { + cout << " , "; + } + } // j + cout << "\n"; + } // i + cout << "\n"; +} + diff --git a/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp~ b/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp~ index 1ff2e063..c521ced5 100644 --- a/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp~ +++ b/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp~ @@ -523,15 +523,13 @@ camPose[3] = CamFB->pose.orientation.x; // theta orientation } else if (abs(RefPose[2] - camPose[2]) <= safety_margin_X) { if( - (abs(RefPose[1] - camPose[1]) > Py_eps) || - (abs(RefPose[3] - abs(camPose[3])) > A_eps) + (abs(RefPose[1] - camPose[1]) > Py_eps) || (abs(RefPose[3] - abs(camPose[3])) > A_eps) ) { ROS_INFO_STREAM(" delta_X < " << safety_margin_X << " m. , Fixing Y or theta. \n "); Controller(RefPose[2], RefPose[2], RefPose[1], camPose[1], RefPose[3], camPose[3],.01); } else if( - (abs(RefPose[1] - camPose[1]) <= Py_eps) && - (abs(RefPose[3] - abs(camPose[3])) <= A_eps) + (abs(RefPose[1] - camPose[1]) <= Py_eps) && (abs(RefPose[3] - abs(camPose[3])) <= A_eps) ) { ROS_INFO("y & theta fixed successfully, MOVING STRAIGHT AHEAD ... \n"); @@ -541,6 +539,7 @@ camPose[3] = CamFB->pose.orientation.x; // theta orientation }else { Controller(RefPose[2], camPose[2], RefPose[1], camPose[1], RefPose[3], camPose[3],.01); + Q_Learning(); } } else { @@ -775,7 +774,7 @@ geometry_msgs::Twist msg_new; ROS_INFO("CCW rot. is fixed, only moving 2 right side ... \n"); vel_y = -.03; } - }else if (abs(camPose[1] - Y_rand) <= y_thresh_undock) + } else if (abs(camPose[1] - Y_rand) <= y_thresh_undock) { ROS_INFO(" Y-axis is fixed, adjusting theta-axis ... ! \n"); if (abs(abs(camPose[3]) - abs(theta_rand)) <= theta_thresh_undock) @@ -921,8 +920,8 @@ void ImageConverter::Q_Learning() Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample); } - counter = 1; - } else if (y_new == P_indx_i && theta_new == P_indx_j) // - Reward => Punishment + counter = 0; + } else if (y_new == P_indx_i && theta_new == P_indx_j) // (-) Reward => Punishment { cout << "\n Failed to achieve a goal! \n"; diff --git a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp index 1ce7497f..856b3be4 100644 --- a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp +++ b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp @@ -6,9 +6,8 @@ using namespace std; - -const int row = 48; -const int col = 13; + const int row = 48; + const int col = 13; double gamma = .8; diff --git a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~ b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~ index f3d3f718..c1e7aada 100644 --- a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~ +++ b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~ @@ -6,14 +6,13 @@ using namespace std; - -const int row = 48; -const int col = 13; + const int row = 48; + const int col = 13; double gamma = .8; double alpha = .1; -double R_step = 200; +double R_step = 500; double R[row][col] = {0}; double Q[row][col] = {0}; @@ -44,6 +43,9 @@ void generate_rand(); int main() { + + + R[R_indx_i][R_indx_j] = 50; // reward R[P_indx_i][P_indx_j] = -60; // punishment -- GitLab