diff --git a/MobileRobot/Machine_Learning/RL4Docking/docking_with_q_learning/include/RL_Docking.h b/MobileRobot/Machine_Learning/RL4Docking/docking_with_q_learning/include/RL_Docking.h index 1908d8b5991e90551c3d15168eeee354c176a627..b0170db71453e4e4cc5390c7bfce0bbd7a249537 100644 --- a/MobileRobot/Machine_Learning/RL4Docking/docking_with_q_learning/include/RL_Docking.h +++ b/MobileRobot/Machine_Learning/RL4Docking/docking_with_q_learning/include/RL_Docking.h @@ -91,7 +91,7 @@ private: static float TheMarkerSize; void ContStart(); - static bool update_images,found,Go2RandomPose; + static bool update_images,found,Go2RandomPose,inc_dock_counter; bool readArguments ( int argc,char **argv ); @@ -142,6 +142,7 @@ public: static const double y_up,y_dwn,theta_up,theta_dwn; static double y_dot,theta_dot; static int i_Goal, j_Goal; + static int iterations; static int i,j,user_action; static const int row, col; @@ -149,8 +150,8 @@ public: static double alpha; static double R_step; - static double R[27][51]; - static double Q[27][51]; + static double R[23][51]; + static double Q[23][51]; static double Q_curr_state,Q_next_state; static double reward; diff --git a/MobileRobot/Machine_Learning/RL4Docking/docking_with_q_learning/include/RL_Docking.h~ b/MobileRobot/Machine_Learning/RL4Docking/docking_with_q_learning/include/RL_Docking.h~ index e8ae528da80859dd94fe58695a53004d0e6c7db2..1908d8b5991e90551c3d15168eeee354c176a627 100644 --- a/MobileRobot/Machine_Learning/RL4Docking/docking_with_q_learning/include/RL_Docking.h~ +++ b/MobileRobot/Machine_Learning/RL4Docking/docking_with_q_learning/include/RL_Docking.h~ @@ -141,18 +141,16 @@ public: static double x_new,y_new,theta_new; // inp (y,theta) static const double y_up,y_dwn,theta_up,theta_dwn; static double y_dot,theta_dot; - static int i_Goal, j_Goal, P_indx_i, P_indx_j; + static int i_Goal, j_Goal; static int i,j,user_action; - static int iterations; - static const int row, col; static double gamma; static double alpha; static double R_step; - static double R[27][27]; - static double Q[27][27]; + static double R[27][51]; + static double Q[27][51]; static double Q_curr_state,Q_next_state; static double reward; diff --git a/MobileRobot/Machine_Learning/RL4Docking/docking_with_q_learning/src/RL_Docking.cpp b/MobileRobot/Machine_Learning/RL4Docking/docking_with_q_learning/src/RL_Docking.cpp index 29551f1f84ed75bbc58c45f8a2eaff999e85c927..2da2024626c3da7f2e692c26acb9502b65cc9fd8 100644 --- a/MobileRobot/Machine_Learning/RL4Docking/docking_with_q_learning/src/RL_Docking.cpp +++ b/MobileRobot/Machine_Learning/RL4Docking/docking_with_q_learning/src/RL_Docking.cpp @@ -63,20 +63,22 @@ bool RL4Docking::update_images = true; bool RL4Docking::found = false; bool RL4Docking::Go2RandomPose = false; +bool RL4Docking::inc_dock_counter = false; int RL4Docking::ThresParam1 = 0; int RL4Docking::ThresParam2 = 0; double RL4Docking::MIN_SCAN_ANGLE_RAD = -30 * (CV_PI/180); double RL4Docking::MAX_SCAN_ANGLE_RAD = 30 * (CV_PI/180); -float RL4Docking::MIN_PROXIMITY_RANGE_M = .05; +float RL4Docking::MIN_PROXIMITY_RANGE_M = .09; float RL4Docking::closestRangeR; float RL4Docking::closestRangeF; + // ---- Ref. Values for Android Camera ---- // //const double RL4Docking::RefPose[4] = {-.0957, 0.00638817 /* Y_ref*/ , 0.308857 /* X_ref*/ , 0.17 /* theta_ref*/}; // ---- Ref. Values for Logitech Camera ---- // -const double RL4Docking::RefPose[4] = {-.0957, -0.0292778 /* Y_ref*/ , 0.26707 /* X_ref*/ , -0.517004 /* theta_ref*/}; +const double RL4Docking::RefPose[4] = {-.0957, -0.0305812 /* Y_ref*/ , 0.26707 /* X_ref*/ , -0.527004 /* theta_ref*/}; // random pose initialized const double RL4Docking::y_up = .25; @@ -96,7 +98,7 @@ double RL4Docking::y_dot, RL4Docking::theta_dot; // actions double RL4Docking::gamma = .8; double RL4Docking::alpha = .1; -const int RL4Docking::row = 27/*(y_up - y_dwn) * 10000*/; +const int RL4Docking::row = 23/*(y_up - y_dwn) * 10000*/; const int RL4Docking::col = 51/*(theta_up - theta_dwn) * 1000*/; double RL4Docking::R_step = RL4Docking::row * RL4Docking::col; @@ -107,9 +109,10 @@ double RL4Docking::Q_next_state; double RL4Docking::reward; int RL4Docking::counter = 0; int RL4Docking::Time_Reward; +int RL4Docking::iterations = 3; double RL4Docking::sample; -int RL4Docking::i_Goal = 20; // right now it has been set manually +int RL4Docking::i_Goal = 16; // right now it has been set manually int RL4Docking::j_Goal = 22; // right now it has been set manually // ---------------- Q_LEARNING PARAMETERS ---------------- // double RL4Docking::dock_started,RL4Docking::dock_finished,RL4Docking::docking_duration; @@ -246,7 +249,7 @@ void RL4Docking::ProgStart(int argc,char** argv) // Show images, press "SPACE" to diable image // rendering to save CPU time - if (readArguments(argc,argv)==false) + if (readArguments(argc,argv) == false) { cerr<<"check the authenticity of your file again!"<<endl; nh_.shutdown(); @@ -305,14 +308,23 @@ void RL4Docking::ProgStart(int argc,char** argv) float roll,yaw,pitch; float rollE,yawE,pitchE; - if (TheMarkers.size()>0) + if (TheMarkers.size() > 0 && inc_dock_counter == false) { found = true; - }else + } else if (TheMarkers.size() > 0 && inc_dock_counter == true) + { + //found = true; + docking_counter ++; + speed_reducer_X = 1; + speed_reducer_Y = 1; + speed_reducer_theta = 1; + Go2RandomPose = true; + inc_dock_counter = false; + } else { found = false; RorP(i,j); - //ROS_INFO_STREAM("Marker is lost, successful docking trials : " << (docking_counter - 1) << "\n"); + GenerateRandomVal(); //keepMoving = false; if(j < j_Goal) @@ -322,12 +334,8 @@ void RL4Docking::ProgStart(int argc,char** argv) { dock(0,0,.05); } - //docking_counter ++; - GenerateRandomVal(); - speed_reducer_X = 1; - speed_reducer_Y = 1; - speed_reducer_theta = 1; - Go2RandomPose = true; + + inc_dock_counter = true; } if (ros::ok() && found) @@ -479,8 +487,14 @@ void RL4Docking::ScanCallBackF(const sensor_msgs::LaserScan::ConstPtr& scanF){ if (closestRangeF <= MIN_PROXIMITY_RANGE_M) { - ROS_INFO(" Oops!FRONTSIDE Obstacle!... Stop...! \n "); - keepMoving = false; + ROS_INFO(" Oops!FRONTSIDE Obstacle!....! \n "); + //keepMoving = false; + GenerateRandomVal(); + docking_counter ++; + speed_reducer_X = 1; + speed_reducer_Y = 1; + speed_reducer_theta = 1; + Go2RandomPose = true; } } @@ -503,14 +517,26 @@ void RL4Docking::ScanCallBackR(const sensor_msgs::LaserScan::ConstPtr& scanR) //ROS_INFO_STREAM(" Closest range for the rear laser sensor: " << closestRangeR); if (closestRangeR <= MIN_PROXIMITY_RANGE_M) { - ROS_INFO("Oops!BACKSIDE Obstacle!... Stop...! \n "); - keepMoving = false; + ROS_INFO("Oops!BACKSIDE Obstacle!....! \n "); + //keepMoving = false; + GenerateRandomVal(); + docking_counter ++; + speed_reducer_X = 1; + speed_reducer_Y = 1; + speed_reducer_theta = 1; + Go2RandomPose = true; } } void RL4Docking::camCB(const geometry_msgs::PoseStamped::ConstPtr& CamFB) // subscriber { + if (docking_counter > iterations ) + { + ROS_INFO("ietarations done"); + print_Q(); + keepMoving = false; + } // in Marker coordinate sys. // z => X robot (thrust) @@ -693,9 +719,7 @@ void RL4Docking::Action4QL(int inp_i, int inp_j) ROS_INFO(" current pose : South \n "); user_action = 1; } - ROS_INFO_STREAM(" Q_value = " << Q_curr_state << " , Actions CCW(1), R(2), CW(3), L(4) : " << user_action << "\n"); - } void RL4Docking::Q_Learning(int indx_i, int indx_j, int action) @@ -819,7 +843,7 @@ void RL4Docking::RorP(int indx_i, int indx_j) //keepMoving = false; } - //print_Q(); + print_Q(); /*// punishment for (int p = (i_Goal - 1); p <= (i_Goal + 1); p++) @@ -921,7 +945,7 @@ void RL4Docking::GenerateRandomVal() // ------------------ Generating Random Pose ------------------ //x_new = ((double) rand() / (RAND_MAX)) * (1.1 - x_rand_SM) + x_rand_SM; - x_new = .9; + x_new = 1.05; y_new = ((double) rand() / (RAND_MAX)) * (y_up - y_dwn) + y_dwn; //theta_new = ((double) rand() / (RAND_MAX)) * (theta_up - theta_dwn) + theta_dwn; theta_new = ((double) rand() / (RAND_MAX)) * (theta_up - (-theta_up)) + (-theta_up); diff --git a/MobileRobot/Machine_Learning/RL4Docking/docking_with_q_learning/src/RL_Docking.cpp~ b/MobileRobot/Machine_Learning/RL4Docking/docking_with_q_learning/src/RL_Docking.cpp~ index 2f0733d7bec22673ec121dea8fbbb1731cf7b515..29551f1f84ed75bbc58c45f8a2eaff999e85c927 100644 --- a/MobileRobot/Machine_Learning/RL4Docking/docking_with_q_learning/src/RL_Docking.cpp~ +++ b/MobileRobot/Machine_Learning/RL4Docking/docking_with_q_learning/src/RL_Docking.cpp~ @@ -109,7 +109,7 @@ int RL4Docking::counter = 0; int RL4Docking::Time_Reward; double RL4Docking::sample; -int RL4Docking::i_Goal = 19; // right now it has been set manually +int RL4Docking::i_Goal = 20; // right now it has been set manually int RL4Docking::j_Goal = 22; // right now it has been set manually // ---------------- Q_LEARNING PARAMETERS ---------------- // double RL4Docking::dock_started,RL4Docking::dock_finished,RL4Docking::docking_duration; @@ -312,10 +312,22 @@ void RL4Docking::ProgStart(int argc,char** argv) { found = false; RorP(i,j); - ROS_INFO_STREAM("Marker is lost, successful docking trials : " << (docking_counter - 1) << "\n"); - keepMoving = false; - //RandomPose(x_new,y_new,theta_new); - //move2docking(-control_signalX, -control_signalY, control_signalYAW); + //ROS_INFO_STREAM("Marker is lost, successful docking trials : " << (docking_counter - 1) << "\n"); + //keepMoving = false; + + if(j < j_Goal) + { + dock(0,0,-.05); + } else + { + dock(0,0,.05); + } + //docking_counter ++; + GenerateRandomVal(); + speed_reducer_X = 1; + speed_reducer_Y = 1; + speed_reducer_theta = 1; + Go2RandomPose = true; } if (ros::ok() && found) @@ -547,7 +559,7 @@ camPose[3] = CamFB->pose.orientation.x; // theta orientation speed_reducer_Y = 0; speed_reducer_theta = 0; MoveStraight = true; - } else /*if(i != i_Goal || j != j_Goal)*/ + } else { MoveStraight = false; speed_reducer_X = 0; @@ -614,7 +626,7 @@ void RL4Docking::i_j_Generator(double y, double theta) speed_reducer_Y = 1; speed_reducer_theta = 1; Go2RandomPose = true; - break; + break; } } @@ -774,13 +786,10 @@ void RL4Docking::RorP(int indx_i, int indx_j) if ((indx_i == i_Goal) && (indx_j == j_Goal)) { Time_Reward = -counter; - //cout << " Time Reward = "<< Time_Reward << "\n"; - ROS_INFO_STREAM(" Time Reward = "<< Time_Reward << " steps.\n"); if(abs(Time_Reward) <= R_step) { - //cout << "\n Goal is achieved <= " << R_step << " time steps\n"; ROS_INFO_STREAM(" Goal is achieved <= " << R_step << " steps. => reward\n"); reward = R[indx_i][indx_j]; Q_next_state = 0; @@ -789,7 +798,6 @@ void RL4Docking::RorP(int indx_i, int indx_j) Q[indx_i][indx_j] = ((1 - alpha) * Q[indx_i][indx_j]) + (alpha * sample); } else { - //cout << "\n Goal is achieved > " << R_step << " time steps => time_punishment\n"; ROS_INFO_STREAM(" Goal is achieved > " << R_step << " steps. => time_punishment\n"); reward = -1; // ??? Q_next_state = 0; @@ -811,8 +819,6 @@ void RL4Docking::RorP(int indx_i, int indx_j) //keepMoving = false; } - - //print_Q(); /*// punishment @@ -835,7 +841,6 @@ void RL4Docking::RorP(int indx_i, int indx_j) } break; }*/ - } void RL4Docking::RL(double RefX, double MarPoseX, double RefY, double MarPoseY, double RefYAW, double MarPoseYAW)