From a9e83340b16121ffbcc89bfde0702bff3c567cdb Mon Sep 17 00:00:00 2001 From: Farid Alijani <farid.alijani@student.lut.fi> Date: Thu, 7 Apr 2016 20:55:50 +0200 Subject: [PATCH] Q learning added to robot, not working --- .../CamMark/camtomar/include/VisionControl.h | 50 ++-- .../CamMark/camtomar/include/VisionControl.h~ | 1 + .../CamMark/camtomar/src/VisionControl.cpp | 210 ++++++++++--- .../CamMark/camtomar/src/VisionControl.cpp~ | 280 ++++++++++++------ .../Machine_Learning/Practice/Q_Learning | Bin 14514 -> 14442 bytes .../Machine_Learning/Practice/Q_learning.cpp | 20 +- .../Machine_Learning/Practice/Q_learning.cpp~ | 22 +- 7 files changed, 398 insertions(+), 185 deletions(-) diff --git a/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h b/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h index 9619df65..e7e8fbd8 100644 --- a/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h +++ b/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h @@ -121,15 +121,11 @@ public: void GenerateRandomVal(); void RandomPose(double X_rand, double Y_rand, double theta_rand); - static double Pos_Px,Pos_Ix,Pos_Dx; - static double Pos_Py,Pos_Iy,Pos_Dy; + static double Kp_x,Ki_x,Kd_x; + + static double Kp_theta,Ki_theta,Kd_theta; - static double S_Ang_P,S_Ang_I,S_Ang_D; - static double L_Ang_P,L_Ang_I,L_Ang_D; - - static double Ang_P,Ang_I,Ang_D; - - static double x_new,y_new,theta_new; + static double safety_margin_X; double x_rand_SM; @@ -163,7 +159,34 @@ public: static double control_signalYAW; // ---- CONTROLL PARAMETERS ------ // - + + // ---- Q_LEARNING PARAMETERS ------ // + + static double x_new,y_new,theta_new; // inp (y,theta) + static double Kp_y,Ki_y,Kd_y; // actions (Kp,Ki,Kd) + + static int R_indx_i, R_indx_j, P_indx_i, P_indx_j; + + + static int row, col; + static double gamma; + static double alpha; + static double R_step; + + double R[row][col]; + double Q[row][col]; + + static double Q_curr_state,Q_next_state; + static double reward; + static int counter; + static int Time_Reward; + static double sample; + + void Q_Learning(); + + // ---- Q_LEARNING PARAMETERS ------ // + + // ---- TIMER PARAMETERS ------ // static int msec,sec,min,hr; // ---- TIMER PARAMETERS ------ // @@ -181,13 +204,4 @@ public: double camPose[6]; double RefPose[6]; - /*double prev_error[6]; - double int_error[6]; - double curr_error[6]; - double diff[6]; - double p_term[6]; - double d_term[6]; - double i_term[6]; - double control_signal[6];*/ - }; diff --git a/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h~ b/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h~ index d92b9ae7..9619df65 100644 --- a/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h~ +++ b/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h~ @@ -175,6 +175,7 @@ public: static double Py_eps,Pz_eps,A_eps; static double x_thresh_undock,y_thresh_undock,theta_thresh_undock; static double rand_X_SM,rand_A_esp,rand_Y_esp; + static double speed_reducer; double marpose[6]; double camPose[6]; diff --git a/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp b/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp index 18e3e50d..9552bb01 100644 --- a/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp +++ b/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp @@ -46,7 +46,6 @@ using namespace cv; using namespace aruco; using namespace std; - float ImageConverter::TheMarkerSize = .1; // Default marker size int ImageConverter::Thresh1_min = 10; @@ -68,45 +67,64 @@ int ImageConverter::ThresParam1 = 0; int ImageConverter::ThresParam2 = 0; // ---- CONTROLL PARAMETERS ------ // -double ImageConverter::prev_errorX; -double ImageConverter::curr_errorX; -double ImageConverter::int_errorX; -double ImageConverter::diffX; +double ImageConverter::prev_errorX, ImageConverter::curr_errorX, ImageConverter::int_errorX, ImageConverter::diffX; + double ImageConverter::p_termX = 0; double ImageConverter::i_termX = 0; double ImageConverter::d_termX = 0; -double ImageConverter::control_signalX; -double ImageConverter::prev_errorY; -double ImageConverter::curr_errorY; -double ImageConverter::int_errorY; -double ImageConverter::diffY; +double ImageConverter::prev_errorY, ImageConverter::curr_errorY, ImageConverter::int_errorY, ImageConverter::diffY; + double ImageConverter::p_termY = 0; double ImageConverter::i_termY = 0; double ImageConverter::d_termY = 0; -double ImageConverter::control_signalY; -double ImageConverter::prev_errorYAW; -double ImageConverter::curr_errorYAW; -double ImageConverter::int_errorYAW; -double ImageConverter::diffYAW; +double ImageConverter::prev_errorYAW,ImageConverter::curr_errorYAW,ImageConverter::int_errorYAW,ImageConverter::diffYAW; + double ImageConverter::p_termYAW = 0; double ImageConverter::i_termYAW = 0; double ImageConverter::d_termYAW = 0; -double ImageConverter::control_signalYAW; + + +double ImageConverter::control_signalX, ImageConverter::control_signalY, ImageConverter::control_signalYAW; // ---- CONTROLL PARAMETERS ------ // -// ---------------- Initial Gains which dock the robot successfully ---------------- // -double ImageConverter::Pos_Py = .4; -double ImageConverter::Pos_Iy = .0042; -double ImageConverter::Pos_Dy = 0; +// ---------------- PID gains---------------- // +double ImageConverter::Kp_y = .4; +double ImageConverter::Ki_y = .0042; +double ImageConverter::Kd_y = 0; -double ImageConverter::Ang_P = .2 * Pos_Py; -double ImageConverter::Ang_I = (8/15) * Pos_Iy; -double ImageConverter::Ang_D = .2 * Pos_Dy; +double ImageConverter::Kp_theta = .2 * Kp_y; +double ImageConverter::Ki_theta = (8/15) * Ki_y; +double ImageConverter::Kd_theta = .2 * Kd_y; +// ---------------- PID gains---------------- // -double ImageConverter::dock_started,ImageConverter::dock_finished,ImageConverter::docking_duration; +// random pose initialized +double ImageConverter::x_new,ImageConverter::y_new,ImageConverter::theta_new; + +// ---------------- Q_LEARNING PARAMETERS ---------------- // + +double ImageConverter::gamma = .8; +double ImageConverter::alpha = .1; +double ImageConverter::R_step = 200; + + +int ImageConverter::row = ; +int ImageConverter::col = ; + +double ImageConverter::Q_curr_state = Q[y_new][theta_new]; +double ImageConverter::Q_next_state; +int ImageConverter::counter = 1; +int ImageConverter::Time_Reward; +double ImageConverter::sample; + + + + +// ---------------- Q_LEARNING PARAMETERS ---------------- // + +double ImageConverter::dock_started,ImageConverter::dock_finished,ImageConverter::docking_duration; double ImageConverter::zeroMax = .0000000000000000001; double ImageConverter::zeroMin = -.0000000000000000001; @@ -124,8 +142,6 @@ double ImageConverter::x_thresh_undock = .02; double ImageConverter::y_thresh_undock = .015; double ImageConverter::theta_thresh_undock = (CV_PI/180) * 3; -// random pose initialized -double ImageConverter::x_new,ImageConverter::y_new,ImageConverter::theta_new; double ImageConverter::docking_counter = 1; ImageConverter::ImageConverter() : @@ -457,12 +473,12 @@ void ImageConverter::ContStart() } -void ImageConverter::camCB(const geometry_msgs::PoseStamped::ConstPtr& CamFB) +void ImageConverter::camCB(const geometry_msgs::PoseStamped::ConstPtr& CamFB) // subscriber { camPose[0] = CamFB->pose.position.x; -camPose[1] = CamFB->pose.position.y; +camPose[1] = CamFB->pose.position.y; // y pose camPose[2] = CamFB->pose.position.z; -camPose[3] = CamFB->pose.orientation.x; +camPose[3] = CamFB->pose.orientation.x; // theta orientation // in Marker coordinate sys. @@ -475,7 +491,7 @@ camPose[3] = CamFB->pose.orientation.x; ROS_INFO_STREAM("--------- PID gains in trial no. " << docking_counter << " : ---------\n"); - ROS_INFO_STREAM(" Kp = " << Pos_Py << " , Ki = " << Pos_Iy << " , Kd = " << Pos_Dy << "\n"); + ROS_INFO_STREAM(" Kp = " << Kp_y << " , Ki = " << Ki_y << " , Kd = " << Kd_y << "\n"); ROS_INFO_STREAM(" --------------------- Pose estimation ------------------ \n"); ROS_INFO_STREAM(" X_mar = " << camPose[2] << " vs X_ref = " << RefPose[2] << " \n"); @@ -487,7 +503,7 @@ camPose[3] = CamFB->pose.orientation.x; if(Go2RandomPose == false) { - ROS_INFO_STREAM("---------- MOVING TOWARDS DOCKING PLATFORM ... --------- \n "); + ROS_INFO_STREAM("---------- MOVING TOWARDS DOCKING PLATFORM --------- \n "); if ( (abs(RefPose[2] - camPose[2]) <= Pz_eps) //&& // Z //(abs(RefPose[1] - camPose[1]) <= Py_eps) && // Y @@ -528,7 +544,7 @@ camPose[3] = CamFB->pose.orientation.x; } } else { - ROS_INFO("---------- MOVING TOWARDS RANDOM POSE ... ---------\n"); + ROS_INFO("---------- MOVING TOWARDS RANDOM POSE ---------\n"); RandomPose(x_new,y_new,theta_new); } } @@ -582,9 +598,9 @@ void ImageConverter::Controller(double RefX, double MarPoseX, double RefY, doubl diffY = ((curr_errorY - prev_errorY) / dt); // scalling - p_termY = Pos_Py * curr_errorY; - i_termY = Pos_Iy * int_errorY; - d_termY = Pos_Dy * diffY; + p_termY = Kp_y * curr_errorY; + i_termY = Ki_y * int_errorY; + d_termY = Kd_y * diffY; //ROS_INFO_STREAM("pY = " << p_termY << ", iY = " << i_termY << " dY = " << d_termY<< " \n"); // control signal @@ -622,9 +638,9 @@ void ImageConverter::Controller(double RefX, double MarPoseX, double RefY, doubl ROS_INFO_STREAM(" adjusting orientation! \n"); // scalling - p_termYAW = Ang_P * curr_errorYAW; - i_termYAW = Ang_I * int_errorYAW; - d_termYAW = Ang_D * diffYAW; + p_termYAW = Kp_theta * curr_errorYAW; + i_termYAW = Ki_theta * int_errorYAW; + d_termYAW = Kd_theta * diffYAW; // control signal control_signalYAW = p_termYAW + i_termYAW + d_termYAW; @@ -704,21 +720,19 @@ void ImageConverter::move2docking(double VelX_est, double VelY_est, double omega void ImageConverter::GenerateRandomVal() { // ---------------- PID gains ------------------ - Pos_Py = ((double) rand() / (RAND_MAX)) * (.76 - .4) + .4; // .1 < Kp < .76 - Pos_Iy = ((double) rand() / (RAND_MAX)) * .006; // 0 < Ki < .007 - Pos_Dy = ((double) rand() / (RAND_MAX)) * .02; // 0 < Kd < .02 + Kp_y = ((double) rand() / (RAND_MAX)) * (.76 - .4) + .4; //.1 < Kp < .76 + Ki_y = ((double) rand() / (RAND_MAX)) * .006; // 0 < Ki < .006 + Kd_y = ((double) rand() / (RAND_MAX)) * .02; // 0 < Kd < .01 // ------------------ Generating Random Pose ------------------ x_new = ((double) rand() / (RAND_MAX)) * (1.1 - x_rand_SM) + x_rand_SM; - y_new = ((double) rand() / (RAND_MAX)) * (.48 - (-.24)) + (-.24); - theta_new = ((double) rand() / (RAND_MAX)) * (.5*RefPose[3] - (-.5*RefPose[3])) + (-.5*RefPose[3]); + y_new = ((double) rand() / (RAND_MAX)) * (.48 - (-.24)) + (-.24); // will be used for Q_Learning + theta_new = ((double) rand() / (RAND_MAX)) * (.5*RefPose[3] - (-.5*RefPose[3])) + (-.5*RefPose[3]); // will be used for Q_Learning } void ImageConverter::RandomPose(double X_rand, double Y_rand, double theta_rand) { ROS_INFO_STREAM(" Xr = " << X_rand << ", Yr = " << Y_rand << ", Thetar = " << theta_rand << " rad ~ " << theta_rand * (180/CV_PI) << " deg\n"); - //ROS_INFO_STREAM(" Y_rand = " << Y_rand << " m. \n"); - //ROS_INFO_STREAM(" theta_rand = " << theta_rand << " rad. =~ " << theta_rand * (180/CV_PI) << " deg. \n"); ROS_INFO_STREAM(" -------------------------------------------------------------- \n"); double vel_x,vel_y,omega_z; @@ -819,3 +833,109 @@ geometry_msgs::Twist msg_new; commandPub.publish(msg_new); } + +void ImageConverter::Q_Learning() +{ + while ( iterations <= it_ ) + { + if (user_action == 1 && y_new != 0) // North + { + reward = R[y_new][theta_new]; + Q_next_state = Q[y_new - 1][theta_new]; + + sample = reward + gamma * Q_next_state; + Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample); + Q_curr_state = Q_next_state; + i--; + counter++; + } else if (user_action == 1 && y_new == 0) // North + { + cout << "You can't go further up!\n"; + } else if (user_action == 3 && i < (row - 1)) // South, i < row + { + reward = R[y_new][theta_new]; + Q_next_state = Q[y_new + 1][theta_new]; + + sample = reward + gamma * Q_next_state; + Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample); + Q_curr_state = Q_next_state; + i++; + counter++; + } else if (user_action == 3 && y_new >= (row - 1)) // South + { + cout << "You can't go further down!\n"; + } else if (user_action == 2 && theta_new < (col - 1)) // East + { + reward = R[y_new][theta_new]; + Q_next_state = Q[i][j + 1]; + + sample = reward + gamma * Q_next_state; + Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample); + Q_curr_state = Q_next_state; + j++; + counter++; + } else if (user_action == 2 && j >= (col - 1)) // East, j > col + { + cout << "You can't go further right!\n"; + } else if (user_action == 4 && j != 0 ) // West + { + reward = R[y_new][theta_new]; + Q_next_state = Q[y_new][theta_new - 1]; + + sample = reward + gamma * Q_next_state; + Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample); + Q_curr_state = Q_next_state; + j--; + counter++; + } else if (user_action == 4 && theta_new == 0) // West, j = 1 + { + cout << "You can't go further left!\n"; + } else + { + cout << "\nGenerating random pose in grid for 1st. time!\n"; + generate_rand(); + } + + // + Reward + if (y_new == R_indx_i && theta_new == R_indx_j) + { + Time_Reward = -counter; + cout << " Time Reward = "<< Time_Reward << "\n"; + + if(abs(Time_Reward) <= R_step) + { + + cout << "\n Goal is achieved <= " << R_step << " time steps\n"; + reward = R[y_new][theta_new]; + Q_next_state = 0; + + sample = reward + gamma * Q_next_state; + Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample); + } else + { + cout << "\n Goal is achieved > " << R_step << " time steps => time_punishment\n"; + reward = -1; // ??? + Q_next_state = 0; + + sample = reward + gamma * Q_next_state; + Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample); + } + + counter = 0; + } else if (y_new == P_indx_i && theta_new == P_indx_j) // - Reward => Punishment + { + cout << "\n Failed to achieve a goal! \n"; + + reward = R[y_new][theta_new]; + Q_next_state = 0; + + sample = reward + gamma * Q_next_state; + Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample); + } + + cout << "\n Q_value = " << Q_curr_state << " , actions N(1), E(2), S(3), W(4) : "; + user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; // Generating random user_action + printf(" ramdom user action = %i \n",user_action); + //cin >> user_action; + } +} diff --git a/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp~ b/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp~ index d8d96b0d..1ff2e063 100644 --- a/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp~ +++ b/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp~ @@ -46,7 +46,6 @@ using namespace cv; using namespace aruco; using namespace std; - float ImageConverter::TheMarkerSize = .1; // Default marker size int ImageConverter::Thresh1_min = 10; @@ -68,75 +67,81 @@ int ImageConverter::ThresParam1 = 0; int ImageConverter::ThresParam2 = 0; // ---- CONTROLL PARAMETERS ------ // -double ImageConverter::prev_errorX; -double ImageConverter::curr_errorX; -double ImageConverter::int_errorX; -double ImageConverter::diffX; +double ImageConverter::prev_errorX, ImageConverter::curr_errorX, ImageConverter::int_errorX, ImageConverter::diffX; + double ImageConverter::p_termX = 0; double ImageConverter::i_termX = 0; double ImageConverter::d_termX = 0; -double ImageConverter::control_signalX; -double ImageConverter::prev_errorY; -double ImageConverter::curr_errorY; -double ImageConverter::int_errorY; -double ImageConverter::diffY; +double ImageConverter::prev_errorY, ImageConverter::curr_errorY, ImageConverter::int_errorY, ImageConverter::diffY; + double ImageConverter::p_termY = 0; double ImageConverter::i_termY = 0; double ImageConverter::d_termY = 0; -double ImageConverter::control_signalY; -double ImageConverter::prev_errorYAW; -double ImageConverter::curr_errorYAW; -double ImageConverter::int_errorYAW; -double ImageConverter::diffYAW; +double ImageConverter::prev_errorYAW,ImageConverter::curr_errorYAW,ImageConverter::int_errorYAW,ImageConverter::diffYAW; + double ImageConverter::p_termYAW = 0; double ImageConverter::i_termYAW = 0; double ImageConverter::d_termYAW = 0; -double ImageConverter::control_signalYAW; + + +double ImageConverter::control_signalX, ImageConverter::control_signalY, ImageConverter::control_signalYAW; // ---- CONTROLL PARAMETERS ------ // -// ---------------- Initial Gains which dock the robot successfully ---------------- // +// ---------------- PID gains---------------- // +double ImageConverter::Kp_y = .4; +double ImageConverter::Ki_y = .0042; +double ImageConverter::Kd_y = 0; -double ImageConverter::Pos_Px = .1; -double ImageConverter::Pos_Ix = 0.0028; -double ImageConverter::Pos_Dx = 0; +double ImageConverter::Kp_theta = .2 * Kp_y; +double ImageConverter::Ki_theta = (8/15) * Ki_y; +double ImageConverter::Kd_theta = .2 * Kd_y; +// ---------------- PID gains---------------- // -double ImageConverter::Pos_Py = 4 * Pos_Px; -double ImageConverter::Pos_Iy = 1.5 * Pos_Ix; -double ImageConverter::Pos_Dy = 4 * Pos_Dx; +// random pose initialized +double ImageConverter::x_new,ImageConverter::y_new,ImageConverter::theta_new; -/*double ImageConverter::S_Ang_P = .2 * Pos_Px; -double ImageConverter::S_Ang_I = .2 * Pos_Ix; -double ImageConverter::S_Ang_D = .2 * Pos_Dx; +// ---------------- Q_LEARNING PARAMETERS ---------------- // -double ImageConverter::L_Ang_P = .8 * Pos_Px; -double ImageConverter::L_Ang_I = .8 * Pos_Ix; -double ImageConverter::L_Ang_D = .8 * Pos_Dx;*/ +double ImageConverter::gamma = .8; +double ImageConverter::alpha = .1; +double ImageConverter::R_step = 200; + + +int ImageConverter::row = ; +int ImageConverter::col = ; + +double ImageConverter::Q_curr_state = Q[y_new][theta_new]; +double ImageConverter::Q_next_state; + +int ImageConverter::counter = 1; +int ImageConverter::Time_Reward; +double ImageConverter::sample; -double ImageConverter::Ang_P = .8 * Pos_Px; -double ImageConverter::Ang_I = .8 * Pos_Ix; -double ImageConverter::Ang_D = .8 * Pos_Dx; -double ImageConverter::dock_started,ImageConverter::dock_finished,ImageConverter::docking_duration; + +// ---------------- Q_LEARNING PARAMETERS ---------------- // + +double ImageConverter::dock_started,ImageConverter::dock_finished,ImageConverter::docking_duration; double ImageConverter::zeroMax = .0000000000000000001; double ImageConverter::zeroMin = -.0000000000000000001; +double ImageConverter::speed_reducer = 1; + // ------ offsets X, Y, theta for Docking --------- double ImageConverter::Pz_eps = .001; double ImageConverter::Py_eps = .002; double ImageConverter::A_eps = (CV_PI/180) * .5; // 1 deg. -double ImageConverter::safety_margin_X = .1; // safety margin X axis in docking process : 10 cm +double ImageConverter::safety_margin_X = .15; // safety margin X axis in docking process : 15 cm // ------ offsets X, Y, theta for Undocking --------- double ImageConverter::x_thresh_undock = .02; -double ImageConverter::y_thresh_undock = .01; +double ImageConverter::y_thresh_undock = .015; double ImageConverter::theta_thresh_undock = (CV_PI/180) * 3; -// random pose initialized -double ImageConverter::x_new,ImageConverter::y_new,ImageConverter::theta_new; double ImageConverter::docking_counter = 1; ImageConverter::ImageConverter() : @@ -151,8 +156,8 @@ double ImageConverter::docking_counter = 1; // Ref. Values RefPose[0] = -.0957; -RefPose[1] = -0.0196804; -RefPose[2] = 0.305155; +RefPose[1] = -0.0194504; +RefPose[2] = 0.304966; RefPose[3] = 0.703702; x_rand_SM = RefPose[2] + .55; // 55 cm spreading domain in the x-axis while moving towards the random pose @@ -468,13 +473,13 @@ void ImageConverter::ContStart() } -void ImageConverter::camCB(const geometry_msgs::PoseStamped::ConstPtr& CamFB) +void ImageConverter::camCB(const geometry_msgs::PoseStamped::ConstPtr& CamFB) // subscriber { camPose[0] = CamFB->pose.position.x; -camPose[1] = CamFB->pose.position.y; +camPose[1] = CamFB->pose.position.y; // y pose camPose[2] = CamFB->pose.position.z; -camPose[3] = CamFB->pose.orientation.x; - +camPose[3] = CamFB->pose.orientation.x; // theta orientation + // in Marker coordinate sys. // z => X robot (thrust) @@ -486,7 +491,7 @@ camPose[3] = CamFB->pose.orientation.x; ROS_INFO_STREAM("--------- PID gains in trial no. " << docking_counter << " : ---------\n"); - ROS_INFO_STREAM(" Kp = " << Pos_Px << " , Ki = " << Pos_Ix << " , Kd = " << Pos_Dx << "\n"); + ROS_INFO_STREAM(" Kp = " << Kp_y << " , Ki = " << Ki_y << " , Kd = " << Kd_y << "\n"); ROS_INFO_STREAM(" --------------------- Pose estimation ------------------ \n"); ROS_INFO_STREAM(" X_mar = " << camPose[2] << " vs X_ref = " << RefPose[2] << " \n"); @@ -498,7 +503,7 @@ camPose[3] = CamFB->pose.orientation.x; if(Go2RandomPose == false) { - ROS_INFO_STREAM("---------- MOVING TOWARDS THE DOCKING STATION ... --------- \n "); + ROS_INFO_STREAM("---------- MOVING TOWARDS DOCKING PLATFORM --------- \n "); if ( (abs(RefPose[2] - camPose[2]) <= Pz_eps) //&& // Z //(abs(RefPose[1] - camPose[1]) <= Py_eps) && // Y @@ -511,6 +516,7 @@ camPose[3] = CamFB->pose.orientation.x; //keepMoving = false; GenerateRandomVal(); docking_counter ++; + speed_reducer = 1; Go2RandomPose = true; // to make sure that y & theta are within the threshold... @@ -529,6 +535,7 @@ camPose[3] = CamFB->pose.orientation.x; ) { ROS_INFO("y & theta fixed successfully, MOVING STRAIGHT AHEAD ... \n"); + speed_reducer = .1; Controller(RefPose[2], camPose[2], RefPose[1], RefPose[1], RefPose[3], RefPose[3],.01); } }else @@ -537,7 +544,7 @@ camPose[3] = CamFB->pose.orientation.x; } } else { - ROS_INFO(" Random pose : \n"); + ROS_INFO("---------- MOVING TOWARDS RANDOM POSE ---------\n"); RandomPose(x_new,y_new,theta_new); } } @@ -545,41 +552,32 @@ camPose[3] = CamFB->pose.orientation.x; void ImageConverter::Controller(double RefX, double MarPoseX, double RefY, double MarPoseY, double RefYAW, double MarPoseYAW, double dt) { ROS_INFO_STREAM("--------------------- Controller started ----------------------\n "); - // -----------------X--------------------- // + // -----------------X--------------------- // if(abs(RefX - MarPoseX) > Pz_eps) { - // e(t) = setpoint - actual value; + /*// e(t) = setpoint - actual value; curr_errorX = RefX - MarPoseX; // Integrated error int_errorX += curr_errorX * dt; - /* - // -- windup gaurd -- - if (int_error < ) - {} - else if () - {}*/ - // differentiation diffX = ((curr_errorX - prev_errorX) / dt); - // scalling p_termX = Pos_Px * curr_errorX; i_termX = Pos_Ix * int_errorX; d_termX = Pos_Dx * diffX; - // control signal control_signalX = p_termX + i_termX + d_termX; - - - // save the current error as the previous one - // for the next iteration. - prev_errorX = curr_errorX; + prev_errorX = curr_errorX;*/ + control_signalX = speed_reducer * 0.05; } else { control_signalX = 0; } + + + // -----------------Y--------------------- // if((RefY - MarPoseY) < -Py_eps || (RefY - MarPoseY) > Py_eps) @@ -600,11 +598,11 @@ void ImageConverter::Controller(double RefX, double MarPoseX, double RefY, doubl diffY = ((curr_errorY - prev_errorY) / dt); // scalling - p_termY = Pos_Py * curr_errorY; - i_termY = Pos_Iy * int_errorY; - d_termY = Pos_Dy * diffY; + p_termY = Kp_y * curr_errorY; + i_termY = Ki_y * int_errorY; + d_termY = Kd_y * diffY; - ROS_INFO_STREAM("pY = " << p_termY << ", iY = " << i_termY << " dY = " << d_termY<< " \n"); + //ROS_INFO_STREAM("pY = " << p_termY << ", iY = " << i_termY << " dY = " << d_termY<< " \n"); // control signal control_signalY = p_termY + i_termY + d_termY; @@ -637,12 +635,12 @@ void ImageConverter::Controller(double RefX, double MarPoseX, double RefY, doubl // differentiation diffYAW = ((curr_errorYAW - prev_errorYAW) / dt); - + ROS_INFO_STREAM(" adjusting orientation! \n"); // scalling - p_termYAW = Ang_P * curr_errorYAW; - i_termYAW = Ang_I * int_errorYAW; - d_termYAW = Ang_D * diffYAW; + p_termYAW = Kp_theta * curr_errorYAW; + i_termYAW = Ki_theta * int_errorYAW; + d_termYAW = Kd_theta * diffYAW; // control signal control_signalYAW = p_termYAW + i_termYAW + d_termYAW; @@ -669,7 +667,8 @@ void ImageConverter::Controller(double RefX, double MarPoseX, double RefY, doubl //ROS_INFO_STREAM("RAW Control signalYAW = "<< control_signalYAW <<". \n"); ROS_INFO_STREAM(" ---------------------- Controller ended ----------------------- \n"); - dock(-control_signalX, control_signalY, control_signalYAW); + //dock(-control_signalX, control_signalY, control_signalYAW); + dock(control_signalX, control_signalY, control_signalYAW); } void ImageConverter::dock(double VelX, double VelY, double omegaZ) @@ -721,21 +720,19 @@ void ImageConverter::move2docking(double VelX_est, double VelY_est, double omega void ImageConverter::GenerateRandomVal() { // ---------------- PID gains ------------------ - Pos_Px = ((double) rand() / (RAND_MAX)) * (.19 - .1) + .1; // .1 < Kp < .16 - Pos_Ix = ((double) rand() / (RAND_MAX)) * .01; // 0 < Ki < .01 - Pos_Dx = ((double) rand() / (RAND_MAX)) * .02; // 0 < Kd < .01 + Kp_y = ((double) rand() / (RAND_MAX)) * (.76 - .4) + .4; //.1 < Kp < .76 + Ki_y = ((double) rand() / (RAND_MAX)) * .006; // 0 < Ki < .006 + Kd_y = ((double) rand() / (RAND_MAX)) * .02; // 0 < Kd < .01 // ------------------ Generating Random Pose ------------------ x_new = ((double) rand() / (RAND_MAX)) * (1.1 - x_rand_SM) + x_rand_SM; - y_new = ((double) rand() / (RAND_MAX)) * (.48 - (-.24)) + (-.24); - theta_new = ((double) rand() / (RAND_MAX)) * (.5*RefPose[3] - (-.5*RefPose[3])) + (-.5*RefPose[3]); + y_new = ((double) rand() / (RAND_MAX)) * (.48 - (-.24)) + (-.24); // will be used for Q_Learning + theta_new = ((double) rand() / (RAND_MAX)) * (.5*RefPose[3] - (-.5*RefPose[3])) + (-.5*RefPose[3]); // will be used for Q_Learning } void ImageConverter::RandomPose(double X_rand, double Y_rand, double theta_rand) { ROS_INFO_STREAM(" Xr = " << X_rand << ", Yr = " << Y_rand << ", Thetar = " << theta_rand << " rad ~ " << theta_rand * (180/CV_PI) << " deg\n"); - //ROS_INFO_STREAM(" Y_rand = " << Y_rand << " m. \n"); - //ROS_INFO_STREAM(" theta_rand = " << theta_rand << " rad. =~ " << theta_rand * (180/CV_PI) << " deg. \n"); ROS_INFO_STREAM(" -------------------------------------------------------------- \n"); double vel_x,vel_y,omega_z; @@ -823,31 +820,12 @@ geometry_msgs::Twist msg_new; vel_y = -.03; } - }/* else if ((Y_rand < 0) && (theta_rand < 0) && (camPose[1] - Y_rand > y_thresh_undock)) - { // extream case on the right side, move 2 right, CCW - - ROS_INFO(" EXTREAM case on right side, move 2 right, CCW \n"); - if ((abs(Y_rand) - camPose[1]) > y_thresh_undock) - { - vel_y = -.01; - omega_z = .05; - } - } else if ((Y_rand > 0) && (theta_rand > 0) && (camPose[1] - Y_rand < -y_thresh_undock)) - { // extream case on the left side - - ROS_INFO(" EXTREAM case on left side, move 2 left, CW \n"); - if ((abs(Y_rand) - camPose[1]) > y_thresh_undock) - { - vel_y = .01; - omega_z = -.05; - } - }*/ else + } else { ROS_INFO(" New condition should be added! \n"); keepMoving = false; } } - msg_new.linear.x = vel_x; msg_new.linear.y = vel_y; msg_new.angular.z = omega_z; @@ -855,3 +833,109 @@ geometry_msgs::Twist msg_new; commandPub.publish(msg_new); } + +void ImageConverter::Q_Learning() +{ + while ( iterations <= it_ ) + { + if (user_action == 1 && y_new != 0) // North + { + reward = R[y_new][theta_new]; + Q_next_state = Q[y_new - 1][theta_new]; + + sample = reward + gamma * Q_next_state; + Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample); + Q_curr_state = Q_next_state; + i--; + counter++; + } else if (user_action == 1 && y_new == 0) // North + { + cout << "You can't go further up!\n"; + } else if (user_action == 3 && i < (row - 1)) // South, i < row + { + reward = R[y_new][theta_new]; + Q_next_state = Q[y_new + 1][theta_new]; + + sample = reward + gamma * Q_next_state; + Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample); + Q_curr_state = Q_next_state; + i++; + counter++; + } else if (user_action == 3 && y_new >= (row - 1)) // South + { + cout << "You can't go further down!\n"; + } else if (user_action == 2 && theta_new < (col - 1)) // East + { + reward = R[y_new][theta_new]; + Q_next_state = Q[i][j + 1]; + + sample = reward + gamma * Q_next_state; + Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample); + Q_curr_state = Q_next_state; + j++; + counter++; + } else if (user_action == 2 && j >= (col - 1)) // East, j > col + { + cout << "You can't go further right!\n"; + } else if (user_action == 4 && j != 0 ) // West + { + reward = R[y_new][theta_new]; + Q_next_state = Q[y_new][theta_new - 1]; + + sample = reward + gamma * Q_next_state; + Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample); + Q_curr_state = Q_next_state; + j--; + counter++; + } else if (user_action == 4 && theta_new == 0) // West, j = 1 + { + cout << "You can't go further left!\n"; + } else + { + cout << "\nGenerating random pose in grid for 1st. time!\n"; + generate_rand(); + } + + // + Reward + if (y_new == R_indx_i && theta_new == R_indx_j) + { + Time_Reward = -counter; + cout << " Time Reward = "<< Time_Reward << "\n"; + + if(abs(Time_Reward) <= R_step) + { + + cout << "\n Goal is achieved <= " << R_step << " time steps\n"; + reward = R[y_new][theta_new]; + Q_next_state = 0; + + sample = reward + gamma * Q_next_state; + Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample); + } else + { + cout << "\n Goal is achieved > " << R_step << " time steps => time_punishment\n"; + reward = -1; // ??? + Q_next_state = 0; + + sample = reward + gamma * Q_next_state; + Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample); + } + + counter = 1; + } else if (y_new == P_indx_i && theta_new == P_indx_j) // - Reward => Punishment + { + cout << "\n Failed to achieve a goal! \n"; + + reward = R[y_new][theta_new]; + Q_next_state = 0; + + sample = reward + gamma * Q_next_state; + Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample); + } + + cout << "\n Q_value = " << Q_curr_state << " , actions N(1), E(2), S(3), W(4) : "; + user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; // Generating random user_action + printf(" ramdom user action = %i \n",user_action); + //cin >> user_action; + } +} diff --git a/MobileRobot/Machine_Learning/Practice/Q_Learning b/MobileRobot/Machine_Learning/Practice/Q_Learning index 795da90ad5a7bb58acac57b42c0af4e805d3ab43..a2326691693b50e35e9bc15e880392c7235af958 100755 GIT binary patch delta 3517 zcmdl~_^M#S1<oz93}CQj;!S<74NPD*gN4pyA;vbr2htGk18D~^<H6*WjJHMJ#eKaw z^SS>lUd{p~g}i@CX1^+YH_u>Nz{t2@vL%Z?<AKeYEbkd57bqlvHL)@<Fz|y}5aPh* zLbg>*0vjY17#K>ucMHfRFnDycu9~dPsUp=Xslf1G<R}M3tXxuo!Nc-K(W7Im$&)iV z-IzihCU4|4V0xf5`7x(~s^0;RPL`_=loA+TlyWmL?0X>Tz_9;-Vgkbp{eS=e?|a~x zz_5S8WD70@)ed_FhUOoP{H<|}3=G|@t@a8Gtq1s90>Hd-dj*DW(Ng=#?>QwWuajVt z0Ga5~$r=d~XLYt$VCW6_-_2@gKiQi{!Nx3s!K0Iv%U*%u6TblOBvCNU`pXVPM;-); zv4Q0x4u0YnWEBFLVfw^QfuWoCk=<kgZV8SfAdcuEyU7OJilSx-44?QTPkiDR5LJL0 zwZU%kUmneRkUib3{&or=>mopUSiS5N7@A)&cCtExx#1x00me>NKRX46mw#D6K9dHC z{a`GW>t>a)19`GkWZy@4uvGr}|NrF`u)<Hapb%l^Z;c0s!Fw=QkG~%hBoA!aKw)yv z7ClT3*(xw}v#zlPg$ZLf>vCIAn8bkG+R0jM3km}P-rvGtnl%~BhlEK!m<<V&YLFSG zz95p<8!RrsdmgIiKOY0bC;pg&-K@GGc~Na!^kC|Qs+6=v3Z@-4;9xpv19IJF8wG}D zP`IoDa}U6{+iesWUZydFf~f^0_Jg@pzMHib%+#wB*~cvfOFj>O{r~^c2COm)EX@Q9 zr)V&j8x~F;>>w*d-E3gtBnnMCS|D*&5fCXVWHb2zuQD{@+_nY>4sWs$m}Wf<=0gJM zI+zU!q{r3@4Be(%K_u^H>&f<f5*$-O9MQ?v=pph^5MorLHByKuSc5}E6QqY#-WnPr zVqmTYj0+Nfc@CTa-dKUs0~07jK7yItbt3!J;UVJr^Z);sOTa1*fu&jaTcg3;!&abV z#^2%#=54eBC!F<GumF*Q2FPS9P}J0dNYQGL39lF@f8-ROyiS50ntWWWz~10JBLJpZ zO~8Ce$T)!6kdW~KnPDmiB6(%N;sU%2pn5KF!;;TGOHcy-V~HMEQBakyERh1M(-It5 zlR$b{J1n8OtRBpr2;zcL6G;5ERX1w{NZ<!csdP7MtR=|rrGooh-~qMm`~Uy1MfN4Y z1(tsQ|NliT8v{ePs212j8%qTS%R~IF*O(X>^t(l+z=B)&A%cgPK*>|iQi0+7jZRju zoUDkY0>i!uU;`&J3o0_bu$ZhSXvuhBa)O`=M}`H+pJ^7b@XmyWx0i(i!wyh+>(R-o zW&sXS-gSInnpF(UhlH#Wm<<V8J&;+Z3?P#CpE+19Zxd9{CQew$J}?K_c+VU?WNn}- z&zOTkR>mxW;UzmGNRt37D_Ba<^})Vl5O4Gg={wn(BU~s2wQUhE*j839FdyPVF)$nA zLM4#hreDlJ#px%rNx~8wS3qphOJ?Y<F@TzMzzpOXF0%xN<E#y43JeUBJB5|)9YDHR zAMro}-2jwyMGe4Wm!M)Yj0_C>7^Gn(Lj9Nj|GQZ^K}tl~&Cmn;wJE|Y$4$Xr;PvML z)2!RUe27;LgV_+ToHqq!@P#0fcRpBLfL9r+Czu@+ZlF@I5hO2KZ;I}{yW9}db4@|s zGcZeFcrk&If#F3QIQ^{VhUWK0P-RSD3j|$n?2`j~X)=>&C`YFWC`&e*OpX_oW=b-d zTqCN<4z4{neVSY^W-Sg%NuT%yU0>|yVPIf*afgY4q4U`O1C#%WDl#6JtRN<<3|1}m z>Hq&`*B^|{wLkRv=N)J{z~7qq|NsAe3m{RQASS_hU~;9HlNh+JgB!;QGw!08q9fR# z)Q=#8{xCMz{^0=|RN4bI1XQKFgJp4AV=QjNRBJT3P+VHZg9Q|Npb*zN(8(HR1oEX1 zh!piQg2o)XHl)f@GlB%89GDh#ebFt;Z=}HB(yhv3q`<JR2@+VJ#VfQFyb>5*ta<<c z|Be6-2Zmh*3JDDRKw=)9$6suH4@w;IhM;^AYY4M~17d}x;pV*(@0l14Ca;y&Wb~MP zURu<X;m!a56$}gvN8bMbe}I93Vf)Yj|6edLFm(O;|DS`Afg$$y|Nk0{3=Fw{{{IJ6 z$PfSg|6jn!!0_$Q|NjdZ7#IZp{{Ih(?gf)|WwaR^CP&JsGx|+#l#yrp^JelQ88yb^ zlTXUXGgeK0DI?FQJDF2fol$hMp{zV(=Hx(GdB%##g|h0b4}Smu&op_VtoP;*vU8aP z9O@ky85kJY7#JAz7#J9MHZN2tXPo?ik!7=l(gJ2ij>$V!^*Iskz>litoDMROR-waY zLp63LMvKXo8u~)8R^I{zQ0c?K(4jfGO2dqC!Q_n^_Kc?{f7Ebi<d|%!Y0kO87^2|R z=1R@wjEoN^D{6}~I!w0IzRtK{vY^g>#tV}#>WDMmnEX;Fo^iuuPu+MX1+U4gbl)*9 zm^@RjUy8vC(!At=7{l;`k%2*wL5ksr=VT{+H^C>K5NQ^uw8`X+`r?vSOdv&)3=Pn> zDNM7&<cIp=hN)253N+blu(%Y10MtsDndKl|jMFAN8i-5I1*?%{m;u!bQ?m@DM)Cj? z1A`C)0|O}WKph08o`PgOwU|M!;%B%a2?<b`UQ4jJ6oUlRUbt(S85jf@BpEWGc7csz zU|{f_%xEa?83xuR$uI#b3o|VdEH1^6U<L`924RT4WT-d~)E1cf?8%9S;+`c?*$-&p zSq2pcWnGXt0-)5xz`)Q85@(WR0Ob&nI1JC4e9%x_av4}wgaOp(hsl8G&67VGil-iC z289wYgRM9u9AIW%1L<OrVt4^{11uaKgTvwDe^{CWnFPY`q3S^;7>Ez^=|_<HlP@v~ zPi`=hFqB~dg}M~O7pSQ)f6K9eLigi;4QP0POaNi!$q$XhJ@ufvIH0mHUFJ}6P_F}| z8YXVd0`b#yQHY-|gCrRk7~Cf}8i`9rLhVBLK>|pYaV?nKKY5|CIOCDcJB{s`Hvcu< K%{ckA`Evkw!NCat delta 3522 zcmaD=u&Hpu1<p0H3}CQk;!S<7ElglG1B1Y1A;vbr8`2Q&4QU53<HqEbjJHMB>|oq` ze%0Grwa~53^3NxloWI&&w|NHB0!GFIlPy{F86RxUWO>ghc|aintcjI@fq@^)f)Ecj z7qYEl61X6#z`#)Iy<0#ofx)Aj_0(i-P8F%8k_rs}MUHYn#HLFsFnCzrD0*~^wRv(T zryG;1%H)ll222dflOJ;$s74;}=w!Ldpq#+)qLiC~Vc!Eu2ZsF*6cZR;=>Plwf8QG2 z1cv<wCR=bRsD>yjFf{*Q<Zq2*WMJrK4OLcPXg$E+5&-78D=RQ`i@GXLe$Od6d7T8C z1js~>PF78jIIFO-0z+@W|87=3<;mVW3N{)E3?7}Vx0DnZKJg3iP7(#vtf#>I$b%rU zYhZT7!B6~xV0GQ5Ta*+Sx_LJ#O%~vm;Ftp9h)z<PY{0E3s*%9(i9hnhCw>7@1*lOK zN|XQcXx4-5>1LHzQUF<}0n)=NrKG^n{DQHQRS?X5Euz5Ce1Nf&^^u4I!^^)cAfG)~ z1behpuAB9ZBFK}aBKtnVgQfD%|Nk$qfEDfoD`e(xjR%LpelVAtzaJ7L8x+|<VX{sU zJxnGkDll}j)+mC)gt42oToDu|F(9{gvN|h*!-Ds>FqmdF2J<0dVh?6R!UUwQ+f){0 z60bB^T!8mHRL_4t28K`kF$cR@|0;l7@LK^rm^z^<A1WXPQ-=aLm?nbsur@0|!=(z$ z4H1TNy@eGRUZydFf++x`<Og%9d^c+-n8{ryvX5H|mV6%m`v3o>4OpcbSegkIPTF8D zJe(xhK~{*0DZs)>6q<H^$%De_35XPZBtQ89uQD{@td<7{4sWs$m}Z>~=0gH$IhYLz zB#^pp(^ilHyv_2H?fE1)Qb8QiWO?)u`6vi6%2OUGL_WxYL*%C%$Q|$Hph55q%-tgh z<!%;KV0d{BoB(!!l>A@<g~&lL6BHuq@DTC*`TzgRC190(U}+Zq)@U$yvK%Ox@wfPb zd5v=5gi|jE3lJ%2fFy%$_63om-XIfRF;4!-DL#3f1UoeOh{%Dx!FxskOtZ3p`H+wi z0J9+>15(#*`c4*<rQXPb)$%TY>bbxTOFrj7@}g&C(E}?As&bPoQeZ{Of&(iFq=z*^ z7Mjca!CX}VXkf_*C@{RX>Sk2|$^T#}mF{NMl?C~|RB)dQJfOCH|NsBB$i4))z|!yk z|G%hZV_@hO{Urm63?5kp2FpYIt=E_s81%bEUx5X;@IwR-F@cijCVmBm?>9PGpMm6L zAIK;$?3(~Ka5A%?BGVR`$!da@j1ML!2&!;cfRu=u$-u%p6B^!9G71bkK;^ARC+jz9 zNGPr21JkU}z<fx^egd;0Aq!I1ZF)f(6td^Q;sU%)P(7PCVIjK#Brm#78a-rfpem<G zgF;qDBZ1*1J0nPw04pn4O3?MezGDz?^b6@b@kt|G_(}@wGTueJV4C$7m=AH`GcX(C zLXf&{(<4$K7ao$DBrL(P1jH6yB!%u81E@(IQXtoGX(TWlXBFX9U|^iwDXeTS0Mf<! zhzAnr2B4%X$^aI-1QnBEWMJ6GAPp-K>c9N|-_3ec5>zH#mqZWj-I55efa)wzE#S`s z@k+ZSh=zD&GMEkV3P@eIX(7l&-h8mQ0IxDsPcS<u+(4yZAV^-+UlQGWcez2P3b2|= zg1l#-k-+d`0wV*%i#TxlS<MYivx}h0E=hot3%cIeCkOV@WG2y2j!+3ua`cy&94{)( zq$e@CMpTm>TzhW%G`U{PS{#&;KJg2>zSz&hz`*e04if`I=dt|{CjS#vWPC7LK}=Q| ztXk^R|NqUdKNy>9fAH|nJJ51~zcumy|Nr|IK%zQ9OoH*j<VrCoF>qZ6H;xl#+(j`( zN3cPuA3+BFVQjAb12L$y2WkkYN_Pj#;<UzC+=j_lY;vKvw6p^YDD*%fu5+N1RZR@! zOBoO;DmD2Xrxd8j(T3Dm-=J0QJ1{Ni`l4I*zNi9&OSkG3Q3Zy5O^~qqEMB4YLN|fo z#hUm3|L+LkaA4R~ppd|@4<zQ%dHluJ_n@?)FA7Q<x}wmu!Oj7(f>U(!UWxZijJ%WA zN^6E|IWS5yTgNakFmN$2FmSy2|G$EPf#J;C|Njp#Ffi=@`Tze51_p*nzyAN{U}RuO z{r&&H1|tJQ>7W1qLG|*>KmY$1FfuUw`}6<*0tN;KiNF8<gJONfWK$V!#*WE}GU|+B zlRIVPnONRVUL~W(cz*Il8F|L0$sc9p8BHe(%BnNUPPUYlXDpl?DJ##|Fu77zo$<xw zg|hyWA24!k{vbP-NrWNOfsuiMfsKKIft!JW0TlC$o98N&Gj5hpTENW6FnOn{J}06% z_)*oI(?SN)IJDSosK(C3XfxSTLthBi_*<d?Dufsq40tD3X_zrCnY>ZMp7Gq|j~ecb z7LzSC%{ea^LlivPT&cO7k@3c4MQw4Al=gMT1Cs@H_A`E%d{IZ7@yFzsI`NDvCVT3} zGriEAyh`^S<AKRD_4=hQ=t5eV91vp|elRjH2r@`9oY0-@r0?dq0xAvi6i9_G6GVCg zRGbAQ$iTp0$;7}Q#K6aJLK5N`s4#>5<c<2`jINUf4a6C<CO^~{mn?vqa0ktV8ju>v z*<e{oh7M@s7v`)*lN}Aj8CQeUFdm&;Xduq`3e47@ywE^g(vBJ8ss~UrVY*x=Uo;Sx z3<AqaGE9M*33GGwWJW{rdQgi7<lzJjNI-ai+S?2a3>i>e3(#~GK*i^viC2Qfr5FxC z-2k(t8Y<qo7a`fCl9xkT{d%<mrZjlJl8Cp~uURB?bw-%OHge3=FF$A2bw~+yRyS zAOVr>gvy?n{LxT6@ixdD1}TOeP<vs1e*qQW0u_h3=QlG0g8;+F|4X3mhZ@Gf&H@QT zP_YFPgM|a<WI<zbMnw>5XaZGp1kFq{7KoW2pyD9?AZ$H(qmj6$8&uZ~s4UDS{!npH zM+2l9CLYWJ361HZ5dXqLBOc@i$wH`I=<cro$ub@Vlh-E~8jCaD+1zPt&&1-MpO~}x Zxal57p3;Jp#FEtbw9LH3oXIuj?*Q1fy0!oS diff --git a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp index d62ee1be..1ce7497f 100644 --- a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp +++ b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp @@ -7,23 +7,20 @@ using namespace std; -const int row = 13; +const int row = 48; const int col = 13; double gamma = .8; double alpha = .1; -double R_step = 200; +double R_step = 500; double R[row][col] = {0}; double Q[row][col] = {0}; - -bool Goal = false; int iterations = 1; int it_; int user_action; -int update_final_state; double Q_next_state; int i,j; @@ -37,9 +34,10 @@ int R_indx_j = .5 * col; int P_indx_i = row - 2; int P_indx_j = col - 1; -int counter = 1; +int counter; int Time_Reward; double sample; + void print_R(); void print_Q(); void generate_rand(); @@ -47,7 +45,7 @@ void generate_rand(); int main() { R[R_indx_i][R_indx_j] = 50; // reward - R[P_indx_i][P_indx_j] = -100; // punishment + R[P_indx_i][P_indx_j] = -60; // punishment print_R(); @@ -146,7 +144,7 @@ int main() Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); } - counter = 1; + counter = 0; print_Q(); generate_rand(); iterations++; @@ -167,10 +165,9 @@ int main() } cout << "\n Q_value = " << Q_curr_state << " , actions N(1), E(2), S(3), W(4) : "; - user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; + user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; // Generating random user_action printf(" ramdom user action = %i \n",user_action); //cin >> user_action; - } return 0; } @@ -212,7 +209,8 @@ void print_Q() void generate_rand() { - + // Generate Random Pose for current state (position) + i = ((double) rand() / (RAND_MAX)) * (row) ; j = ((double) rand() / (RAND_MAX)) * (col) ; diff --git a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~ b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~ index 62b41767..f3d3f718 100644 --- a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~ +++ b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~ @@ -7,7 +7,7 @@ using namespace std; -const int row = 13; +const int row = 48; const int col = 13; @@ -17,13 +17,10 @@ double R_step = 200; double R[row][col] = {0}; double Q[row][col] = {0}; - -bool Goal = false; int iterations = 1; int it_; int user_action; -int update_final_state; double Q_next_state; int i,j; @@ -37,9 +34,10 @@ int R_indx_j = .5 * col; int P_indx_i = row - 2; int P_indx_j = col - 1; -int counter = 1; +int counter; int Time_Reward; double sample; + void print_R(); void print_Q(); void generate_rand(); @@ -47,7 +45,7 @@ void generate_rand(); int main() { R[R_indx_i][R_indx_j] = 50; // reward - R[P_indx_i][P_indx_j] = -100; // punishment + R[P_indx_i][P_indx_j] = -60; // punishment print_R(); @@ -146,12 +144,10 @@ int main() Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample); } - counter = 1; + counter = 0; print_Q(); generate_rand(); - iterations++; - - //Goal = true; + iterations++; } else if (i == P_indx_i && j == P_indx_j) // - Reward => Punishment { cout << "\n Failed to achieve a goal! \n"; @@ -169,10 +165,9 @@ int main() } cout << "\n Q_value = " << Q_curr_state << " , actions N(1), E(2), S(3), W(4) : "; - user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; + user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; // Generating random user_action printf(" ramdom user action = %i \n",user_action); //cin >> user_action; - } return 0; } @@ -214,7 +209,8 @@ void print_Q() void generate_rand() { - + // Generate Random Pose for current state (position) + i = ((double) rand() / (RAND_MAX)) * (row) ; j = ((double) rand() / (RAND_MAX)) * (col) ; -- GitLab