From a9e83340b16121ffbcc89bfde0702bff3c567cdb Mon Sep 17 00:00:00 2001
From: Farid Alijani <farid.alijani@student.lut.fi>
Date: Thu, 7 Apr 2016 20:55:50 +0200
Subject: [PATCH] Q learning added to robot, not working

---
 .../CamMark/camtomar/include/VisionControl.h  |  50 ++--
 .../CamMark/camtomar/include/VisionControl.h~ |   1 +
 .../CamMark/camtomar/src/VisionControl.cpp    | 210 ++++++++++---
 .../CamMark/camtomar/src/VisionControl.cpp~   | 280 ++++++++++++------
 .../Machine_Learning/Practice/Q_Learning      | Bin 14514 -> 14442 bytes
 .../Machine_Learning/Practice/Q_learning.cpp  |  20 +-
 .../Machine_Learning/Practice/Q_learning.cpp~ |  22 +-
 7 files changed, 398 insertions(+), 185 deletions(-)

diff --git a/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h b/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h
index 9619df65..e7e8fbd8 100644
--- a/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h
+++ b/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h
@@ -121,15 +121,11 @@ public:
   void GenerateRandomVal();
   void RandomPose(double X_rand, double Y_rand, double theta_rand);
 
-         static double Pos_Px,Pos_Ix,Pos_Dx;
-	 static double Pos_Py,Pos_Iy,Pos_Dy;
+         static double Kp_x,Ki_x,Kd_x;
+	 
+	static double Kp_theta,Ki_theta,Kd_theta;
 
-         static double S_Ang_P,S_Ang_I,S_Ang_D;
-         static double L_Ang_P,L_Ang_I,L_Ang_D;
-
-	static double Ang_P,Ang_I,Ang_D;
-
-	static double x_new,y_new,theta_new;
+	
 
 	static double safety_margin_X;
 	double x_rand_SM;
@@ -163,7 +159,34 @@ public:
         static double control_signalYAW;
         // ---- CONTROLL PARAMETERS ------ //
         
-        
+	        
+	// ---- Q_LEARNING PARAMETERS ------ //
+
+	static double x_new,y_new,theta_new; 	// inp (y,theta)
+	static double Kp_y,Ki_y,Kd_y; 		// actions (Kp,Ki,Kd)
+	
+	static int R_indx_i, R_indx_j, P_indx_i, P_indx_j;
+
+
+	static int row, col;
+	static double gamma;
+	static double alpha;
+	static double R_step;
+	
+	double R[row][col];
+	double Q[row][col];
+
+	static double Q_curr_state,Q_next_state;
+	static double reward;
+	static int counter;
+	static int Time_Reward;
+	static double sample;
+
+	void Q_Learning();
+
+	// ---- Q_LEARNING PARAMETERS ------ //
+
+
 	// ---- TIMER PARAMETERS ------ //
 	static int msec,sec,min,hr;
 	// ---- TIMER PARAMETERS ------ //
@@ -181,13 +204,4 @@ public:
         double camPose[6];
         double RefPose[6];
         
-        /*double prev_error[6];
-        double int_error[6];
-        double curr_error[6];
-        double diff[6];
-        double p_term[6];
-        double d_term[6];
-        double i_term[6];
-        double control_signal[6];*/
-        
 };
diff --git a/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h~ b/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h~
index d92b9ae7..9619df65 100644
--- a/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h~
+++ b/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h~
@@ -175,6 +175,7 @@ public:
 	static double Py_eps,Pz_eps,A_eps;
 	static double x_thresh_undock,y_thresh_undock,theta_thresh_undock;
 	static double rand_X_SM,rand_A_esp,rand_Y_esp;
+	static double speed_reducer;
 
         double marpose[6];
         double camPose[6];
diff --git a/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp b/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp
index 18e3e50d..9552bb01 100644
--- a/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp
+++ b/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp
@@ -46,7 +46,6 @@ using namespace cv;
 using namespace aruco;
 using namespace std;
 
-
 float ImageConverter::TheMarkerSize = .1; // Default marker size
 
 int ImageConverter::Thresh1_min = 10;
@@ -68,45 +67,64 @@ int ImageConverter::ThresParam1 = 0;
 int ImageConverter::ThresParam2 = 0;
 
 // ---- CONTROLL PARAMETERS ------ //
-double ImageConverter::prev_errorX;
-double ImageConverter::curr_errorX;
-double ImageConverter::int_errorX;
-double ImageConverter::diffX;
+double ImageConverter::prev_errorX, ImageConverter::curr_errorX, ImageConverter::int_errorX, ImageConverter::diffX;
+
 double ImageConverter::p_termX = 0;
 double ImageConverter::i_termX = 0;
 double ImageConverter::d_termX = 0;
-double ImageConverter::control_signalX;
 
-double ImageConverter::prev_errorY;
-double ImageConverter::curr_errorY;
-double ImageConverter::int_errorY;
-double ImageConverter::diffY;
+double ImageConverter::prev_errorY, ImageConverter::curr_errorY, ImageConverter::int_errorY, ImageConverter::diffY;
+
 double ImageConverter::p_termY = 0;
 double ImageConverter::i_termY = 0;
 double ImageConverter::d_termY = 0;
-double ImageConverter::control_signalY;
 
-double ImageConverter::prev_errorYAW;
-double ImageConverter::curr_errorYAW;
-double ImageConverter::int_errorYAW;
-double ImageConverter::diffYAW;
+double ImageConverter::prev_errorYAW,ImageConverter::curr_errorYAW,ImageConverter::int_errorYAW,ImageConverter::diffYAW;
+
 double ImageConverter::p_termYAW = 0;
 double ImageConverter::i_termYAW = 0;
 double ImageConverter::d_termYAW = 0;
-double ImageConverter::control_signalYAW;
+
+
+double ImageConverter::control_signalX, ImageConverter::control_signalY, ImageConverter::control_signalYAW;
 // ---- CONTROLL PARAMETERS ------ //
 
-// ----------------  Initial Gains which dock the robot successfully ---------------- //
-double ImageConverter::Pos_Py = .4;
-double ImageConverter::Pos_Iy = .0042;
-double ImageConverter::Pos_Dy = 0;
+// ----------------  PID gains---------------- //
+double ImageConverter::Kp_y = .4;
+double ImageConverter::Ki_y = .0042;
+double ImageConverter::Kd_y = 0;
 
-double ImageConverter::Ang_P = .2 * Pos_Py;
-double ImageConverter::Ang_I = (8/15) * Pos_Iy;
-double ImageConverter::Ang_D = .2 * Pos_Dy;
+double ImageConverter::Kp_theta = .2 * Kp_y;
+double ImageConverter::Ki_theta = (8/15) * Ki_y;
+double ImageConverter::Kd_theta = .2 * Kd_y;
+// ----------------  PID gains---------------- //
 
-double ImageConverter::dock_started,ImageConverter::dock_finished,ImageConverter::docking_duration;
+// random pose initialized
+double ImageConverter::x_new,ImageConverter::y_new,ImageConverter::theta_new;
+
+// ----------------  Q_LEARNING PARAMETERS ---------------- //
+
+double ImageConverter::gamma = .8;
+double ImageConverter::alpha = .1;
+double ImageConverter::R_step = 200;
+
+
+int ImageConverter::row = ;
+int ImageConverter::col = ;
+
+double ImageConverter::Q_curr_state = Q[y_new][theta_new];
+double ImageConverter::Q_next_state;
 
+int ImageConverter::counter = 1;
+int ImageConverter::Time_Reward;
+double ImageConverter::sample;
+
+
+
+
+// ----------------  Q_LEARNING PARAMETERS ---------------- //
+
+double ImageConverter::dock_started,ImageConverter::dock_finished,ImageConverter::docking_duration;
 double ImageConverter::zeroMax = .0000000000000000001;
 double ImageConverter::zeroMin = -.0000000000000000001;
 
@@ -124,8 +142,6 @@ double ImageConverter::x_thresh_undock = .02;
 double ImageConverter::y_thresh_undock = .015;
 double ImageConverter::theta_thresh_undock = (CV_PI/180) * 3;
 
-// random pose initialized
-double ImageConverter::x_new,ImageConverter::y_new,ImageConverter::theta_new;
 
 double ImageConverter::docking_counter = 1;
  ImageConverter::ImageConverter() : 
@@ -457,12 +473,12 @@ void ImageConverter::ContStart()
 
 }
 
-void ImageConverter::camCB(const geometry_msgs::PoseStamped::ConstPtr& CamFB)
+void ImageConverter::camCB(const geometry_msgs::PoseStamped::ConstPtr& CamFB) // subscriber
 {
 camPose[0] = CamFB->pose.position.x;
-camPose[1] = CamFB->pose.position.y;
+camPose[1] = CamFB->pose.position.y; // y pose
 camPose[2] = CamFB->pose.position.z;
-camPose[3] = CamFB->pose.orientation.x;
+camPose[3] = CamFB->pose.orientation.x; //  theta orientation
         
         // in Marker coordinate sys. 
         
@@ -475,7 +491,7 @@ camPose[3] = CamFB->pose.orientation.x;
         
 	
 	ROS_INFO_STREAM("--------- PID gains in trial no. " << docking_counter << " : ---------\n");
-	ROS_INFO_STREAM(" Kp = " << Pos_Py << " ,  Ki = " << Pos_Iy << " , Kd = " << Pos_Dy << "\n");
+	ROS_INFO_STREAM(" Kp = " << Kp_y << " ,  Ki = " << Ki_y << " , Kd = " << Kd_y << "\n");
         
 	ROS_INFO_STREAM(" --------------------- Pose estimation ------------------ \n");
 	ROS_INFO_STREAM(" X_mar = " << camPose[2] << " vs X_ref = " << RefPose[2] << " \n");
@@ -487,7 +503,7 @@ camPose[3] = CamFB->pose.orientation.x;
 
 	if(Go2RandomPose == false)
 	{
-		ROS_INFO_STREAM("---------- MOVING TOWARDS DOCKING PLATFORM ... ---------  \n ");
+		ROS_INFO_STREAM("---------- MOVING TOWARDS DOCKING PLATFORM ---------  \n ");
 		if (
             		(abs(RefPose[2] - camPose[2]) <= Pz_eps) //&& // Z
             		//(abs(RefPose[1] - camPose[1]) <= Py_eps) && // Y
@@ -528,7 +544,7 @@ camPose[3] = CamFB->pose.orientation.x;
 		}
 	} else
 	{
-  		ROS_INFO("---------- MOVING TOWARDS RANDOM POSE ... ---------\n");		
+  		ROS_INFO("---------- MOVING TOWARDS RANDOM POSE ---------\n");		
 		RandomPose(x_new,y_new,theta_new);
 	}
 }
@@ -582,9 +598,9 @@ void ImageConverter::Controller(double RefX, double MarPoseX, double RefY, doubl
         	diffY = ((curr_errorY - prev_errorY) / dt);
         
         	// scalling
-        	p_termY = Pos_Py * curr_errorY;
-        	i_termY = Pos_Iy * int_errorY;
-        	d_termY = Pos_Dy * diffY;
+        	p_termY = Kp_y * curr_errorY;
+        	i_termY = Ki_y * int_errorY;
+        	d_termY = Kd_y * diffY;
 
 		//ROS_INFO_STREAM("pY = " << p_termY << ", iY = " << i_termY << " dY = " << d_termY<< " \n");	      
         	// control signal
@@ -622,9 +638,9 @@ void ImageConverter::Controller(double RefX, double MarPoseX, double RefY, doubl
 
 		ROS_INFO_STREAM(" adjusting orientation! \n");	        		
 		// scalling
-        	p_termYAW = Ang_P * curr_errorYAW;
-       		i_termYAW = Ang_I * int_errorYAW;
-       		d_termYAW = Ang_D * diffYAW;
+        	p_termYAW = Kp_theta * curr_errorYAW;
+       		i_termYAW = Ki_theta * int_errorYAW;
+       		d_termYAW = Kd_theta * diffYAW;
         	
 		// control signal
         	control_signalYAW = p_termYAW + i_termYAW + d_termYAW;
@@ -704,21 +720,19 @@ void ImageConverter::move2docking(double VelX_est, double VelY_est, double omega
 void ImageConverter::GenerateRandomVal()
 {
 	// ---------------- PID gains ------------------
-	Pos_Py = ((double) rand() / (RAND_MAX)) * (.76 - .4) + .4; 	//   .1 < Kp < .76
-	Pos_Iy = ((double) rand() / (RAND_MAX)) * .006;			// 0 < Ki < .007
-	Pos_Dy = ((double) rand() / (RAND_MAX)) * .02;			// 0 < Kd < .02
+	Kp_y = ((double) rand() / (RAND_MAX)) * (.76 - .4) + .4; 	//.1 < Kp < .76
+	Ki_y = ((double) rand() / (RAND_MAX)) * .006;			// 0 < Ki < .006
+	Kd_y = ((double) rand() / (RAND_MAX)) * .02;			// 0 < Kd < .01
 	
 	// ------------------ Generating Random Pose ------------------
 	x_new = ((double) rand() / (RAND_MAX)) * (1.1 - x_rand_SM) + x_rand_SM;
-	y_new = ((double) rand() / (RAND_MAX)) * (.48 - (-.24)) + (-.24);
-	theta_new = ((double) rand() / (RAND_MAX)) * (.5*RefPose[3] - (-.5*RefPose[3])) + (-.5*RefPose[3]);
+	y_new = ((double) rand() / (RAND_MAX)) * (.48 - (-.24)) + (-.24); // will be used for Q_Learning
+	theta_new = ((double) rand() / (RAND_MAX)) * (.5*RefPose[3] - (-.5*RefPose[3])) + (-.5*RefPose[3]); // will be used for Q_Learning
 }
 
 void ImageConverter::RandomPose(double X_rand, double Y_rand, double theta_rand)
 {
 	ROS_INFO_STREAM(" Xr = " << X_rand << ", Yr = " << Y_rand << ", Thetar = " << theta_rand << " rad ~ " << theta_rand * (180/CV_PI) << " deg\n");
-	//ROS_INFO_STREAM(" Y_rand	= " << Y_rand << " m. \n");
-	//ROS_INFO_STREAM(" theta_rand	= " << theta_rand << " rad. =~ " << theta_rand * (180/CV_PI) << " deg. \n");
 	ROS_INFO_STREAM(" -------------------------------------------------------------- \n");
 
 double vel_x,vel_y,omega_z;
@@ -819,3 +833,109 @@ geometry_msgs::Twist msg_new;
 commandPub.publish(msg_new);
 	
 }
+
+void ImageConverter::Q_Learning()
+{
+	while ( iterations <= it_ ) 
+        {       
+                if (user_action == 1 && y_new != 0) // North
+                {
+                        reward = R[y_new][theta_new];
+                        Q_next_state = Q[y_new - 1][theta_new];
+                        
+                        sample = reward + gamma * Q_next_state;
+                        Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample);
+                        Q_curr_state = Q_next_state;
+                        i--;
+                        counter++;
+                } else if (user_action == 1 && y_new == 0) // North
+                {
+                        cout << "You can't go further up!\n";
+                } else if (user_action == 3 && i < (row - 1)) // South, i < row
+                {
+                        reward = R[y_new][theta_new];
+                        Q_next_state = Q[y_new + 1][theta_new];
+                        
+                        sample = reward + gamma * Q_next_state;
+                        Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample);
+                        Q_curr_state = Q_next_state;
+                        i++;
+                        counter++;
+                } else if (user_action == 3 && y_new >= (row - 1)) // South
+                {
+                        cout << "You can't go further down!\n";
+                } else if (user_action == 2 && theta_new < (col - 1)) // East
+                {
+                        reward = R[y_new][theta_new];
+                        Q_next_state = Q[i][j + 1];
+                        
+                        sample = reward + gamma * Q_next_state;
+                        Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample);
+                        Q_curr_state = Q_next_state;
+                        j++;
+                        counter++;
+                } else if (user_action == 2 && j >= (col - 1)) // East, j > col
+                {
+                        cout << "You can't go further right!\n";
+                } else if (user_action == 4 && j != 0 ) // West
+                {
+                        reward = R[y_new][theta_new];
+                        Q_next_state = Q[y_new][theta_new - 1];
+                        
+                        sample = reward + gamma * Q_next_state;
+                        Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample);
+                        Q_curr_state = Q_next_state;
+                        j--;
+                        counter++;
+                } else if (user_action == 4 && theta_new == 0) // West, j = 1
+                {
+                        cout << "You can't go further left!\n";
+                } else
+                {
+                        cout << "\nGenerating random pose in grid for 1st. time!\n";
+                        generate_rand();
+                }
+                
+                // + Reward
+                if (y_new == R_indx_i && theta_new == R_indx_j)
+                {
+                        Time_Reward = -counter;
+                        cout << " Time Reward = "<< Time_Reward << "\n";
+                        
+                        if(abs(Time_Reward) <= R_step)
+                        {
+                                
+                                cout << "\n Goal is achieved <= " << R_step << " time steps\n";
+                                reward = R[y_new][theta_new];
+                                Q_next_state = 0;
+                        
+                                sample = reward + gamma * Q_next_state;
+                                Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample);
+                        } else
+                        {
+                                cout << "\n Goal is achieved > " << R_step << " time steps => time_punishment\n";
+                                reward = -1; // ???
+                                Q_next_state = 0;
+                        
+                                sample = reward + gamma * Q_next_state;
+                                Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample);
+                        }
+                        
+                        counter = 0;  
+                } else if (y_new == P_indx_i && theta_new == P_indx_j) // - Reward => Punishment
+                {
+                           cout << "\n Failed to achieve a goal! \n";
+                                
+                                reward = R[y_new][theta_new];
+                                Q_next_state = 0;
+                        
+                                sample = reward + gamma * Q_next_state;
+                                Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample);  
+                }
+                
+                cout << "\n Q_value = " << Q_curr_state << " , actions N(1), E(2), S(3), W(4) : ";
+                user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; // Generating random user_action  
+                printf(" ramdom user action = %i \n",user_action);
+                //cin >> user_action;
+          }
+}
diff --git a/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp~ b/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp~
index d8d96b0d..1ff2e063 100644
--- a/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp~
+++ b/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp~
@@ -46,7 +46,6 @@ using namespace cv;
 using namespace aruco;
 using namespace std;
 
-
 float ImageConverter::TheMarkerSize = .1; // Default marker size
 
 int ImageConverter::Thresh1_min = 10;
@@ -68,75 +67,81 @@ int ImageConverter::ThresParam1 = 0;
 int ImageConverter::ThresParam2 = 0;
 
 // ---- CONTROLL PARAMETERS ------ //
-double ImageConverter::prev_errorX;
-double ImageConverter::curr_errorX;
-double ImageConverter::int_errorX;
-double ImageConverter::diffX;
+double ImageConverter::prev_errorX, ImageConverter::curr_errorX, ImageConverter::int_errorX, ImageConverter::diffX;
+
 double ImageConverter::p_termX = 0;
 double ImageConverter::i_termX = 0;
 double ImageConverter::d_termX = 0;
-double ImageConverter::control_signalX;
 
-double ImageConverter::prev_errorY;
-double ImageConverter::curr_errorY;
-double ImageConverter::int_errorY;
-double ImageConverter::diffY;
+double ImageConverter::prev_errorY, ImageConverter::curr_errorY, ImageConverter::int_errorY, ImageConverter::diffY;
+
 double ImageConverter::p_termY = 0;
 double ImageConverter::i_termY = 0;
 double ImageConverter::d_termY = 0;
-double ImageConverter::control_signalY;
 
-double ImageConverter::prev_errorYAW;
-double ImageConverter::curr_errorYAW;
-double ImageConverter::int_errorYAW;
-double ImageConverter::diffYAW;
+double ImageConverter::prev_errorYAW,ImageConverter::curr_errorYAW,ImageConverter::int_errorYAW,ImageConverter::diffYAW;
+
 double ImageConverter::p_termYAW = 0;
 double ImageConverter::i_termYAW = 0;
 double ImageConverter::d_termYAW = 0;
-double ImageConverter::control_signalYAW;
+
+
+double ImageConverter::control_signalX, ImageConverter::control_signalY, ImageConverter::control_signalYAW;
 // ---- CONTROLL PARAMETERS ------ //
 
-// ----------------  Initial Gains which dock the robot successfully ---------------- //
+// ----------------  PID gains---------------- //
+double ImageConverter::Kp_y = .4;
+double ImageConverter::Ki_y = .0042;
+double ImageConverter::Kd_y = 0;
 
-double ImageConverter::Pos_Px = .1;
-double ImageConverter::Pos_Ix = 0.0028;
-double ImageConverter::Pos_Dx = 0;
+double ImageConverter::Kp_theta = .2 * Kp_y;
+double ImageConverter::Ki_theta = (8/15) * Ki_y;
+double ImageConverter::Kd_theta = .2 * Kd_y;
+// ----------------  PID gains---------------- //
 
-double ImageConverter::Pos_Py = 4 * Pos_Px;
-double ImageConverter::Pos_Iy = 1.5 * Pos_Ix;
-double ImageConverter::Pos_Dy = 4 * Pos_Dx;
+// random pose initialized
+double ImageConverter::x_new,ImageConverter::y_new,ImageConverter::theta_new;
 
-/*double ImageConverter::S_Ang_P = .2 * Pos_Px;
-double ImageConverter::S_Ang_I = .2 * Pos_Ix;
-double ImageConverter::S_Ang_D = .2 * Pos_Dx;
+// ----------------  Q_LEARNING PARAMETERS ---------------- //
 
-double ImageConverter::L_Ang_P = .8 * Pos_Px;
-double ImageConverter::L_Ang_I = .8 * Pos_Ix;
-double ImageConverter::L_Ang_D = .8 * Pos_Dx;*/
+double ImageConverter::gamma = .8;
+double ImageConverter::alpha = .1;
+double ImageConverter::R_step = 200;
+
+
+int ImageConverter::row = ;
+int ImageConverter::col = ;
+
+double ImageConverter::Q_curr_state = Q[y_new][theta_new];
+double ImageConverter::Q_next_state;
+
+int ImageConverter::counter = 1;
+int ImageConverter::Time_Reward;
+double ImageConverter::sample;
 
-double ImageConverter::Ang_P = .8 * Pos_Px;
-double ImageConverter::Ang_I = .8 * Pos_Ix;
-double ImageConverter::Ang_D = .8 * Pos_Dx;
 
-double ImageConverter::dock_started,ImageConverter::dock_finished,ImageConverter::docking_duration;
 
+
+// ----------------  Q_LEARNING PARAMETERS ---------------- //
+
+double ImageConverter::dock_started,ImageConverter::dock_finished,ImageConverter::docking_duration;
 double ImageConverter::zeroMax = .0000000000000000001;
 double ImageConverter::zeroMin = -.0000000000000000001;
 
+double ImageConverter::speed_reducer = 1;
+
 // ------ offsets X, Y, theta for Docking ---------
 double ImageConverter::Pz_eps = .001;
 double ImageConverter::Py_eps = .002;
 double ImageConverter::A_eps = (CV_PI/180) * .5; // 1 deg.
 
-double ImageConverter::safety_margin_X = .1; // safety margin X axis in docking process : 10 cm
+double ImageConverter::safety_margin_X = .15; // safety margin X axis in docking process : 15 cm
 
 // ------ offsets X, Y, theta for Undocking ---------
 double ImageConverter::x_thresh_undock = .02;
-double ImageConverter::y_thresh_undock = .01;
+double ImageConverter::y_thresh_undock = .015;
 double ImageConverter::theta_thresh_undock = (CV_PI/180) * 3;
 
-// random pose initialized
-double ImageConverter::x_new,ImageConverter::y_new,ImageConverter::theta_new;
 
 double ImageConverter::docking_counter = 1;
  ImageConverter::ImageConverter() : 
@@ -151,8 +156,8 @@ double ImageConverter::docking_counter = 1;
 // Ref. Values
 RefPose[0] = -.0957;
 
-RefPose[1] = -0.0196804;
-RefPose[2] = 0.305155;
+RefPose[1] = -0.0194504;
+RefPose[2] = 0.304966;
 RefPose[3] = 0.703702;
 
 x_rand_SM = RefPose[2] + .55; // 55 cm spreading domain in the x-axis while moving towards the random pose
@@ -468,13 +473,13 @@ void ImageConverter::ContStart()
 
 }
 
-void ImageConverter::camCB(const geometry_msgs::PoseStamped::ConstPtr& CamFB)
+void ImageConverter::camCB(const geometry_msgs::PoseStamped::ConstPtr& CamFB) // subscriber
 {
 camPose[0] = CamFB->pose.position.x;
-camPose[1] = CamFB->pose.position.y;
+camPose[1] = CamFB->pose.position.y; // y pose
 camPose[2] = CamFB->pose.position.z;
-camPose[3] = CamFB->pose.orientation.x;
-
+camPose[3] = CamFB->pose.orientation.x; //  theta orientation
+        
         // in Marker coordinate sys. 
         
         // z => X robot (thrust)
@@ -486,7 +491,7 @@ camPose[3] = CamFB->pose.orientation.x;
         
 	
 	ROS_INFO_STREAM("--------- PID gains in trial no. " << docking_counter << " : ---------\n");
-	ROS_INFO_STREAM(" Kp = " << Pos_Px << " ,  Ki = " << Pos_Ix << " , Kd = " << Pos_Dx << "\n");
+	ROS_INFO_STREAM(" Kp = " << Kp_y << " ,  Ki = " << Ki_y << " , Kd = " << Kd_y << "\n");
         
 	ROS_INFO_STREAM(" --------------------- Pose estimation ------------------ \n");
 	ROS_INFO_STREAM(" X_mar = " << camPose[2] << " vs X_ref = " << RefPose[2] << " \n");
@@ -498,7 +503,7 @@ camPose[3] = CamFB->pose.orientation.x;
 
 	if(Go2RandomPose == false)
 	{
-		ROS_INFO_STREAM("---------- MOVING TOWARDS THE DOCKING STATION ... ---------  \n ");
+		ROS_INFO_STREAM("---------- MOVING TOWARDS DOCKING PLATFORM ---------  \n ");
 		if (
             		(abs(RefPose[2] - camPose[2]) <= Pz_eps) //&& // Z
             		//(abs(RefPose[1] - camPose[1]) <= Py_eps) && // Y
@@ -511,6 +516,7 @@ camPose[3] = CamFB->pose.orientation.x;
 			//keepMoving = false;
 			GenerateRandomVal();
 			docking_counter ++;
+			speed_reducer = 1;
 			Go2RandomPose = true;
 
 		// to make sure that y & theta are within the threshold...
@@ -529,6 +535,7 @@ camPose[3] = CamFB->pose.orientation.x;
 				)
 			{
 				ROS_INFO("y & theta fixed successfully, MOVING STRAIGHT AHEAD ... \n");
+				speed_reducer = .1;
 				Controller(RefPose[2], camPose[2], RefPose[1], RefPose[1], RefPose[3], RefPose[3],.01);
 			}
 		}else
@@ -537,7 +544,7 @@ camPose[3] = CamFB->pose.orientation.x;
 		}
 	} else
 	{
-  		ROS_INFO(" Random pose : \n");		
+  		ROS_INFO("---------- MOVING TOWARDS RANDOM POSE ---------\n");		
 		RandomPose(x_new,y_new,theta_new);
 	}
 }
@@ -545,41 +552,32 @@ camPose[3] = CamFB->pose.orientation.x;
 void ImageConverter::Controller(double RefX, double MarPoseX, double RefY, double MarPoseY, double RefYAW, double MarPoseYAW, double dt)
 {
 	ROS_INFO_STREAM("--------------------- Controller started ----------------------\n "); 
-        // -----------------X--------------------- //
         
+	// -----------------X--------------------- //        
 	if(abs(RefX - MarPoseX) > Pz_eps)
 	{			
-		// e(t) = setpoint - actual value;
+		/*// e(t) = setpoint - actual value;
         	curr_errorX = RefX - MarPoseX;
         
         	// Integrated error
         	int_errorX +=  curr_errorX * dt;
-        	/*
-        	// -- windup gaurd -- 
-        	if (int_error < )
-        	{}
-        	else if ()
-        	{}*/
-        
         	// differentiation
         	diffX = ((curr_errorX - prev_errorX) / dt);
-        
         	// scalling
         	p_termX = Pos_Px * curr_errorX;
         	i_termX = Pos_Ix * int_errorX;
         	d_termX = Pos_Dx * diffX;
-        
         	// control signal
         	control_signalX = p_termX + i_termX + d_termX;
-        
-        
-        	// save the current error as the previous one
-        	// for the next iteration.
-        	prev_errorX = curr_errorX;        
+        	prev_errorX = curr_errorX;*/
+		control_signalX = speed_reducer * 0.05;        
         } else
 	{
 		control_signalX = 0;	
 	}
+
+	
+
         // -----------------Y--------------------- // 
 	 
 	if((RefY - MarPoseY) < -Py_eps || (RefY - MarPoseY) > Py_eps)
@@ -600,11 +598,11 @@ void ImageConverter::Controller(double RefX, double MarPoseX, double RefY, doubl
         	diffY = ((curr_errorY - prev_errorY) / dt);
         
         	// scalling
-        	p_termY = Pos_Py * curr_errorY;
-        	i_termY = Pos_Iy * int_errorY;
-        	d_termY = Pos_Dy * diffY;
+        	p_termY = Kp_y * curr_errorY;
+        	i_termY = Ki_y * int_errorY;
+        	d_termY = Kd_y * diffY;
 
-		ROS_INFO_STREAM("pY = " << p_termY << ", iY = " << i_termY << " dY = " << d_termY<< " \n");	      
+		//ROS_INFO_STREAM("pY = " << p_termY << ", iY = " << i_termY << " dY = " << d_termY<< " \n");	      
         	// control signal
         	control_signalY = p_termY + i_termY + d_termY;
         	
@@ -637,12 +635,12 @@ void ImageConverter::Controller(double RefX, double MarPoseX, double RefY, doubl
         
         	// differentiation
         	diffYAW = ((curr_errorYAW - prev_errorYAW) / dt);
-        	
+
 		ROS_INFO_STREAM(" adjusting orientation! \n");	        		
 		// scalling
-        	p_termYAW = Ang_P * curr_errorYAW;
-       		i_termYAW = Ang_I * int_errorYAW;
-       		d_termYAW = Ang_D * diffYAW;
+        	p_termYAW = Kp_theta * curr_errorYAW;
+       		i_termYAW = Ki_theta * int_errorYAW;
+       		d_termYAW = Kd_theta * diffYAW;
         	
 		// control signal
         	control_signalYAW = p_termYAW + i_termYAW + d_termYAW;
@@ -669,7 +667,8 @@ void ImageConverter::Controller(double RefX, double MarPoseX, double RefY, doubl
 	//ROS_INFO_STREAM("RAW Control signalYAW = "<< control_signalYAW <<". \n");
 	ROS_INFO_STREAM(" ---------------------- Controller ended ----------------------- \n");	
 	
-	dock(-control_signalX, control_signalY, control_signalYAW);
+	//dock(-control_signalX, control_signalY, control_signalYAW);
+	dock(control_signalX, control_signalY, control_signalYAW);
 }
 
 void ImageConverter::dock(double VelX, double VelY, double omegaZ)
@@ -721,21 +720,19 @@ void ImageConverter::move2docking(double VelX_est, double VelY_est, double omega
 void ImageConverter::GenerateRandomVal()
 {
 	// ---------------- PID gains ------------------
-	Pos_Px = ((double) rand() / (RAND_MAX)) * (.19 - .1) + .1; 	//   .1 < Kp < .16
-	Pos_Ix = ((double) rand() / (RAND_MAX)) * .01;			// 0 < Ki < .01
-	Pos_Dx = ((double) rand() / (RAND_MAX)) * .02;			// 0 < Kd < .01
+	Kp_y = ((double) rand() / (RAND_MAX)) * (.76 - .4) + .4; 	//.1 < Kp < .76
+	Ki_y = ((double) rand() / (RAND_MAX)) * .006;			// 0 < Ki < .006
+	Kd_y = ((double) rand() / (RAND_MAX)) * .02;			// 0 < Kd < .01
 	
 	// ------------------ Generating Random Pose ------------------
 	x_new = ((double) rand() / (RAND_MAX)) * (1.1 - x_rand_SM) + x_rand_SM;
-	y_new = ((double) rand() / (RAND_MAX)) * (.48 - (-.24)) + (-.24);
-	theta_new = ((double) rand() / (RAND_MAX)) * (.5*RefPose[3] - (-.5*RefPose[3])) + (-.5*RefPose[3]);
+	y_new = ((double) rand() / (RAND_MAX)) * (.48 - (-.24)) + (-.24); // will be used for Q_Learning
+	theta_new = ((double) rand() / (RAND_MAX)) * (.5*RefPose[3] - (-.5*RefPose[3])) + (-.5*RefPose[3]); // will be used for Q_Learning
 }
 
 void ImageConverter::RandomPose(double X_rand, double Y_rand, double theta_rand)
 {
 	ROS_INFO_STREAM(" Xr = " << X_rand << ", Yr = " << Y_rand << ", Thetar = " << theta_rand << " rad ~ " << theta_rand * (180/CV_PI) << " deg\n");
-	//ROS_INFO_STREAM(" Y_rand	= " << Y_rand << " m. \n");
-	//ROS_INFO_STREAM(" theta_rand	= " << theta_rand << " rad. =~ " << theta_rand * (180/CV_PI) << " deg. \n");
 	ROS_INFO_STREAM(" -------------------------------------------------------------- \n");
 
 double vel_x,vel_y,omega_z;
@@ -823,31 +820,12 @@ geometry_msgs::Twist msg_new;
 				vel_y = -.03;
 			}
 						
-		}/* else if ((Y_rand < 0) && (theta_rand < 0) && (camPose[1] - Y_rand > y_thresh_undock))
-		{ // extream case on the right side, move 2 right, CCW
-				
-				ROS_INFO(" EXTREAM case on right side, move 2 right, CCW \n");
-				if ((abs(Y_rand) - camPose[1]) > y_thresh_undock)
-				{
-					vel_y = -.01;
-					omega_z = .05;				
-				}
-		} else if ((Y_rand > 0) && (theta_rand > 0) && (camPose[1] - Y_rand < -y_thresh_undock))
-		{ // extream case on the left side
-
-				ROS_INFO(" EXTREAM case on left side, move 2 left, CW \n");
-				if ((abs(Y_rand) - camPose[1]) > y_thresh_undock)
-				{
-					vel_y = .01;
-					omega_z = -.05;				
-				}
-		}*/ else
+		} else
 		{
 			ROS_INFO(" New condition should be added! \n");			
 			keepMoving = false;		
 		}			
 	}
-
 	msg_new.linear.x = vel_x;
 	msg_new.linear.y = vel_y;
 	msg_new.angular.z = omega_z;
@@ -855,3 +833,109 @@ geometry_msgs::Twist msg_new;
 commandPub.publish(msg_new);
 	
 }
+
+void ImageConverter::Q_Learning()
+{
+	while ( iterations <= it_ ) 
+        {       
+                if (user_action == 1 && y_new != 0) // North
+                {
+                        reward = R[y_new][theta_new];
+                        Q_next_state = Q[y_new - 1][theta_new];
+                        
+                        sample = reward + gamma * Q_next_state;
+                        Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample);
+                        Q_curr_state = Q_next_state;
+                        i--;
+                        counter++;
+                } else if (user_action == 1 && y_new == 0) // North
+                {
+                        cout << "You can't go further up!\n";
+                } else if (user_action == 3 && i < (row - 1)) // South, i < row
+                {
+                        reward = R[y_new][theta_new];
+                        Q_next_state = Q[y_new + 1][theta_new];
+                        
+                        sample = reward + gamma * Q_next_state;
+                        Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample);
+                        Q_curr_state = Q_next_state;
+                        i++;
+                        counter++;
+                } else if (user_action == 3 && y_new >= (row - 1)) // South
+                {
+                        cout << "You can't go further down!\n";
+                } else if (user_action == 2 && theta_new < (col - 1)) // East
+                {
+                        reward = R[y_new][theta_new];
+                        Q_next_state = Q[i][j + 1];
+                        
+                        sample = reward + gamma * Q_next_state;
+                        Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample);
+                        Q_curr_state = Q_next_state;
+                        j++;
+                        counter++;
+                } else if (user_action == 2 && j >= (col - 1)) // East, j > col
+                {
+                        cout << "You can't go further right!\n";
+                } else if (user_action == 4 && j != 0 ) // West
+                {
+                        reward = R[y_new][theta_new];
+                        Q_next_state = Q[y_new][theta_new - 1];
+                        
+                        sample = reward + gamma * Q_next_state;
+                        Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample);
+                        Q_curr_state = Q_next_state;
+                        j--;
+                        counter++;
+                } else if (user_action == 4 && theta_new == 0) // West, j = 1
+                {
+                        cout << "You can't go further left!\n";
+                } else
+                {
+                        cout << "\nGenerating random pose in grid for 1st. time!\n";
+                        generate_rand();
+                }
+                
+                // + Reward
+                if (y_new == R_indx_i && theta_new == R_indx_j)
+                {
+                        Time_Reward = -counter;
+                        cout << " Time Reward = "<< Time_Reward << "\n";
+                        
+                        if(abs(Time_Reward) <= R_step)
+                        {
+                                
+                                cout << "\n Goal is achieved <= " << R_step << " time steps\n";
+                                reward = R[y_new][theta_new];
+                                Q_next_state = 0;
+                        
+                                sample = reward + gamma * Q_next_state;
+                                Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample);
+                        } else
+                        {
+                                cout << "\n Goal is achieved > " << R_step << " time steps => time_punishment\n";
+                                reward = -1; // ???
+                                Q_next_state = 0;
+                        
+                                sample = reward + gamma * Q_next_state;
+                                Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample);
+                        }
+                        
+                        counter = 1;  
+                } else if (y_new == P_indx_i && theta_new == P_indx_j) // - Reward => Punishment
+                {
+                           cout << "\n Failed to achieve a goal! \n";
+                                
+                                reward = R[y_new][theta_new];
+                                Q_next_state = 0;
+                        
+                                sample = reward + gamma * Q_next_state;
+                                Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample);  
+                }
+                
+                cout << "\n Q_value = " << Q_curr_state << " , actions N(1), E(2), S(3), W(4) : ";
+                user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; // Generating random user_action  
+                printf(" ramdom user action = %i \n",user_action);
+                //cin >> user_action;
+          }
+}
diff --git a/MobileRobot/Machine_Learning/Practice/Q_Learning b/MobileRobot/Machine_Learning/Practice/Q_Learning
index 795da90ad5a7bb58acac57b42c0af4e805d3ab43..a2326691693b50e35e9bc15e880392c7235af958 100755
GIT binary patch
delta 3517
zcmdl~_^M#S1<oz93}CQj;!S<74NPD*gN4pyA;vbr2htGk18D~^<H6*WjJHMJ#eKaw
z^SS>lUd{p~g}i@CX1^+YH_u>Nz{t2@vL%Z?<AKeYEbkd57bqlvHL)@<Fz|y}5aPh*
zLbg>*0vjY17#K>ucMHfRFnDycu9~dPsUp=Xslf1G<R}M3tXxuo!Nc-K(W7Im$&)iV
z-IzihCU4|4V0xf5`7x(~s^0;RPL`_=loA+TlyWmL?0X>Tz_9;-Vgkbp{eS=e?|a~x
zz_5S8WD70@)ed_FhUOoP{H<|}3=G|@t@a8Gtq1s90>Hd-dj*DW(Ng=#?>QwWuajVt
z0Ga5~$r=d~XLYt$VCW6_-_2@gKiQi{!Nx3s!K0Iv%U*%u6TblOBvCNU`pXVPM;-);
zv4Q0x4u0YnWEBFLVfw^QfuWoCk=<kgZV8SfAdcuEyU7OJilSx-44?QTPkiDR5LJL0
zwZU%kUmneRkUib3{&or=>mopUSiS5N7@A)&cCtExx#1x00me>NKRX46mw#D6K9dHC
z{a`GW>t>a)19`GkWZy@4uvGr}|NrF`u)<Hapb%l^Z;c0s!Fw=QkG~%hBoA!aKw)yv
z7ClT3*(xw}v#zlPg$ZLf>vCIAn8bkG+R0jM3km}P-rvGtnl%~BhlEK!m<<V&YLFSG
zz95p<8!RrsdmgIiKOY0bC;pg&-K@GGc~Na!^kC|Qs+6=v3Z@-4;9xpv19IJF8wG}D
zP`IoDa}U6{+iesWUZydFf~f^0_Jg@pzMHib%+#wB*~cvfOFj>O{r~^c2COm)EX@Q9
zr)V&j8x~F;>>w*d-E3gtBnnMCS|D*&5fCXVWHb2zuQD{@+_nY>4sWs$m}Wf<=0gJM
zI+zU!q{r3@4Be(%K_u^H>&f<f5*$-O9MQ?v=pph^5MorLHByKuSc5}E6QqY#-WnPr
zVqmTYj0+Nfc@CTa-dKUs0~07jK7yItbt3!J;UVJr^Z);sOTa1*fu&jaTcg3;!&abV
z#^2%#=54eBC!F<GumF*Q2FPS9P}J0dNYQGL39lF@f8-ROyiS50ntWWWz~10JBLJpZ
zO~8Ce$T)!6kdW~KnPDmiB6(%N;sU%2pn5KF!;;TGOHcy-V~HMEQBakyERh1M(-It5
zlR$b{J1n8OtRBpr2;zcL6G;5ERX1w{NZ<!csdP7MtR=|rrGooh-~qMm`~Uy1MfN4Y
z1(tsQ|NliT8v{ePs212j8%qTS%R~IF*O(X>^t(l+z=B)&A%cgPK*>|iQi0+7jZRju
zoUDkY0>i!uU;`&J3o0_bu$ZhSXvuhBa)O`=M}`H+pJ^7b@XmyWx0i(i!wyh+>(R-o
zW&sXS-gSInnpF(UhlH#Wm<<V8J&;+Z3?P#CpE+19Zxd9{CQew$J}?K_c+VU?WNn}-
z&zOTkR>mxW;UzmGNRt37D_Ba<^})Vl5O4Gg={wn(BU~s2wQUhE*j839FdyPVF)$nA
zLM4#hreDlJ#px%rNx~8wS3qphOJ?Y<F@TzMzzpOXF0%xN<E#y43JeUBJB5|)9YDHR
zAMro}-2jwyMGe4Wm!M)Yj0_C>7^Gn(Lj9Nj|GQZ^K}tl~&Cmn;wJE|Y$4$Xr;PvML
z)2!RUe27;LgV_+ToHqq!@P#0fcRpBLfL9r+Czu@+ZlF@I5hO2KZ;I}{yW9}db4@|s
zGcZeFcrk&If#F3QIQ^{VhUWK0P-RSD3j|$n?2`j~X)=>&C`YFWC`&e*OpX_oW=b-d
zTqCN<4z4{neVSY^W-Sg%NuT%yU0>|yVPIf*afgY4q4U`O1C#%WDl#6JtRN<<3|1}m
z>Hq&`*B^|{wLkRv=N)J{z~7qq|NsAe3m{RQASS_hU~;9HlNh+JgB!;QGw!08q9fR#
z)Q=#8{xCMz{^0=|RN4bI1XQKFgJp4AV=QjNRBJT3P+VHZg9Q|Npb*zN(8(HR1oEX1
zh!piQg2o)XHl)f@GlB%89GDh#ebFt;Z=}HB(yhv3q`<JR2@+VJ#VfQFyb>5*ta<<c
z|Be6-2Zmh*3JDDRKw=)9$6suH4@w;IhM;^AYY4M~17d}x;pV*(@0l14Ca;y&Wb~MP
zURu<X;m!a56$}gvN8bMbe}I93Vf)Yj|6edLFm(O;|DS`Afg$$y|Nk0{3=Fw{{{IJ6
z$PfSg|6jn!!0_$Q|NjdZ7#IZp{{Ih(?gf)|WwaR^CP&JsGx|+#l#yrp^JelQ88yb^
zlTXUXGgeK0DI?FQJDF2fol$hMp{zV(=Hx(GdB%##g|h0b4}Smu&op_VtoP;*vU8aP
z9O@ky85kJY7#JAz7#J9MHZN2tXPo?ik!7=l(gJ2ij>$V!^*Iskz>litoDMROR-waY
zLp63LMvKXo8u~)8R^I{zQ0c?K(4jfGO2dqC!Q_n^_Kc?{f7Ebi<d|%!Y0kO87^2|R
z=1R@wjEoN^D{6}~I!w0IzRtK{vY^g>#tV}#>WDMmnEX;Fo^iuuPu+MX1+U4gbl)*9
zm^@RjUy8vC(!At=7{l;`k%2*wL5ksr=VT{+H^C>K5NQ^uw8`X+`r?vSOdv&)3=Pn>
zDNM7&<cIp=hN)253N+blu(%Y10MtsDndKl|jMFAN8i-5I1*?%{m;u!bQ?m@DM)Cj?
z1A`C)0|O}WKph08o`PgOwU|M!;%B%a2?<b`UQ4jJ6oUlRUbt(S85jf@BpEWGc7csz
zU|{f_%xEa?83xuR$uI#b3o|VdEH1^6U<L`924RT4WT-d~)E1cf?8%9S;+`c?*$-&p
zSq2pcWnGXt0-)5xz`)Q85@(WR0Ob&nI1JC4e9%x_av4}wgaOp(hsl8G&67VGil-iC
z289wYgRM9u9AIW%1L<OrVt4^{11uaKgTvwDe^{CWnFPY`q3S^;7>Ez^=|_<HlP@v~
zPi`=hFqB~dg}M~O7pSQ)f6K9eLigi;4QP0POaNi!$q$XhJ@ufvIH0mHUFJ}6P_F}|
z8YXVd0`b#yQHY-|gCrRk7~Cf}8i`9rLhVBLK>|pYaV?nKKY5|CIOCDcJB{s`Hvcu<
K%{ckA`Evkw!NCat

delta 3522
zcmaD=u&Hpu1<p0H3}CQk;!S<7ElglG1B1Y1A;vbr8`2Q&4QU53<HqEbjJHMB>|oq`
ze%0Grwa~53^3NxloWI&&w|NHB0!GFIlPy{F86RxUWO>ghc|aintcjI@fq@^)f)Ecj
z7qYEl61X6#z`#)Iy<0#ofx)Aj_0(i-P8F%8k_rs}MUHYn#HLFsFnCzrD0*~^wRv(T
zryG;1%H)ll222dflOJ;$s74;}=w!Ldpq#+)qLiC~Vc!Eu2ZsF*6cZR;=>Plwf8QG2
z1cv<wCR=bRsD>yjFf{*Q<Zq2*WMJrK4OLcPXg$E+5&-78D=RQ`i@GXLe$Od6d7T8C
z1js~>PF78jIIFO-0z+@W|87=3<;mVW3N{)E3?7}Vx0DnZKJg3iP7(#vtf#>I$b%rU
zYhZT7!B6~xV0GQ5Ta*+Sx_LJ#O%~vm;Ftp9h)z<PY{0E3s*%9(i9hnhCw>7@1*lOK
zN|XQcXx4-5>1LHzQUF<}0n)=NrKG^n{DQHQRS?X5Euz5Ce1Nf&^^u4I!^^)cAfG)~
z1behpuAB9ZBFK}aBKtnVgQfD%|Nk$qfEDfoD`e(xjR%LpelVAtzaJ7L8x+|<VX{sU
zJxnGkDll}j)+mC)gt42oToDu|F(9{gvN|h*!-Ds>FqmdF2J<0dVh?6R!UUwQ+f){0
z60bB^T!8mHRL_4t28K`kF$cR@|0;l7@LK^rm^z^<A1WXPQ-=aLm?nbsur@0|!=(z$
z4H1TNy@eGRUZydFf++x`<Og%9d^c+-n8{ryvX5H|mV6%m`v3o>4OpcbSegkIPTF8D
zJe(xhK~{*0DZs)>6q<H^$%De_35XPZBtQ89uQD{@td<7{4sWs$m}Z>~=0gH$IhYLz
zB#^pp(^ilHyv_2H?fE1)Qb8QiWO?)u`6vi6%2OUGL_WxYL*%C%$Q|$Hph55q%-tgh
z<!%;KV0d{BoB(!!l>A@<g~&lL6BHuq@DTC*`TzgRC190(U}+Zq)@U$yvK%Ox@wfPb
zd5v=5gi|jE3lJ%2fFy%$_63om-XIfRF;4!-DL#3f1UoeOh{%Dx!FxskOtZ3p`H+wi
z0J9+>15(#*`c4*<rQXPb)$%TY>bbxTOFrj7@}g&C(E}?As&bPoQeZ{Of&(iFq=z*^
z7Mjca!CX}VXkf_*C@{RX>Sk2|$^T#}mF{NMl?C~|RB)dQJfOCH|NsBB$i4))z|!yk
z|G%hZV_@hO{Urm63?5kp2FpYIt=E_s81%bEUx5X;@IwR-F@cijCVmBm?>9PGpMm6L
zAIK;$?3(~Ka5A%?BGVR`$!da@j1ML!2&!;cfRu=u$-u%p6B^!9G71bkK;^ARC+jz9
zNGPr21JkU}z<fx^egd;0Aq!I1ZF)f(6td^Q;sU%)P(7PCVIjK#Brm#78a-rfpem<G
zgF;qDBZ1*1J0nPw04pn4O3?MezGDz?^b6@b@kt|G_(}@wGTueJV4C$7m=AH`GcX(C
zLXf&{(<4$K7ao$DBrL(P1jH6yB!%u81E@(IQXtoGX(TWlXBFX9U|^iwDXeTS0Mf<!
zhzAnr2B4%X$^aI-1QnBEWMJ6GAPp-K>c9N|-_3ec5>zH#mqZWj-I55efa)wzE#S`s
z@k+ZSh=zD&GMEkV3P@eIX(7l&-h8mQ0IxDsPcS<u+(4yZAV^-+UlQGWcez2P3b2|=
zg1l#-k-+d`0wV*%i#TxlS<MYivx}h0E=hot3%cIeCkOV@WG2y2j!+3ua`cy&94{)(
zq$e@CMpTm>TzhW%G`U{PS{#&;KJg2>zSz&hz`*e04if`I=dt|{CjS#vWPC7LK}=Q|
ztXk^R|NqUdKNy>9fAH|nJJ51~zcumy|Nr|IK%zQ9OoH*j<VrCoF>qZ6H;xl#+(j`(
zN3cPuA3+BFVQjAb12L$y2WkkYN_Pj#;<UzC+=j_lY;vKvw6p^YDD*%fu5+N1RZR@!
zOBoO;DmD2Xrxd8j(T3Dm-=J0QJ1{Ni`l4I*zNi9&OSkG3Q3Zy5O^~qqEMB4YLN|fo
z#hUm3|L+LkaA4R~ppd|@4<zQ%dHluJ_n@?)FA7Q<x}wmu!Oj7(f>U(!UWxZijJ%WA
zN^6E|IWS5yTgNakFmN$2FmSy2|G$EPf#J;C|Njp#Ffi=@`Tze51_p*nzyAN{U}RuO
z{r&&H1|tJQ>7W1qLG|*>KmY$1FfuUw`}6<*0tN;KiNF8<gJONfWK$V!#*WE}GU|+B
zlRIVPnONRVUL~W(cz*Il8F|L0$sc9p8BHe(%BnNUPPUYlXDpl?DJ##|Fu77zo$<xw
zg|hyWA24!k{vbP-NrWNOfsuiMfsKKIft!JW0TlC$o98N&Gj5hpTENW6FnOn{J}06%
z_)*oI(?SN)IJDSosK(C3XfxSTLthBi_*<d?Dufsq40tD3X_zrCnY>ZMp7Gq|j~ecb
z7LzSC%{ea^LlivPT&cO7k@3c4MQw4Al=gMT1Cs@H_A`E%d{IZ7@yFzsI`NDvCVT3}
zGriEAyh`^S<AKRD_4=hQ=t5eV91vp|elRjH2r@`9oY0-@r0?dq0xAvi6i9_G6GVCg
zRGbAQ$iTp0$;7}Q#K6aJLK5N`s4#>5<c<2`jINUf4a6C<CO^~{mn?vqa0ktV8ju>v
z*<e{oh7M@s7v`)*lN}Aj8CQeUFdm&;Xduq`3e47@ywE^g(vBJ8ss~UrVY*x=Uo;Sx
z3<AqaGE9M*33GGwWJW{rdQgi7<lzJjNI-ai+S?2a3>i>e3(#~GK*i^viC2Qfr5FxC
z-2k(t8Y&LTqo7a`fCl9xkT{d%<mrZjlJl8Cp~uURB?bw-%OHge3=FF$A2bw~+yRyS
zAOVr>gvy?n{LxT6@ixdD1}TOeP<vs1e*qQW0u_h3=QlG0g8;+F|4X3mhZ@Gf&H@QT
zP_YFPgM|a<WI<zbMnw>5XaZGp1kFq{7KoW2pyD9?AZ$H(qmj6$8&uZ~s4UDS{!npH
zM+2l9CLYWJ361HZ5dXqLBOc@i$wH`I=<cro$ub@Vlh-E~8jCaD+1zPt&&1-MpO~}x
Zxal57p3;Jp#FEtbw9LH3oXIuj?*Q1fy0!oS

diff --git a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp
index d62ee1be..1ce7497f 100644
--- a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp
+++ b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp
@@ -7,23 +7,20 @@
 using namespace std;
 
 
-const int row = 13;
+const int row = 48;
 const int col = 13;
 
 
 double gamma = .8;
 double alpha = .1;
-double R_step = 200;
+double R_step = 500;
 
 double R[row][col] = {0};
 double Q[row][col] = {0};
- 
-bool Goal = false;
 
 int iterations = 1;
 int it_;
 int user_action;
-int update_final_state;
 double Q_next_state;
 
 int i,j;
@@ -37,9 +34,10 @@ int R_indx_j = .5 * col;
 int P_indx_i = row - 2;
 int P_indx_j = col - 1;
 
-int counter = 1;
+int counter;
 int Time_Reward;
 double sample;
+
 void print_R();
 void print_Q();
 void generate_rand();
@@ -47,7 +45,7 @@ void generate_rand();
 int main()
 {
         R[R_indx_i][R_indx_j] = 50; // reward
-        R[P_indx_i][P_indx_j] = -100; // punishment
+        R[P_indx_i][P_indx_j] = -60; // punishment
 
         print_R();
         
@@ -146,7 +144,7 @@ int main()
                                 Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                         }
                         
-                        counter = 1;
+                        counter = 0;
                         print_Q();
                         generate_rand();
                         iterations++;    
@@ -167,10 +165,9 @@ int main()
                 }
                 
                 cout << "\n Q_value = " << Q_curr_state << " , actions N(1), E(2), S(3), W(4) : ";
-                user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1;
+                user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; // Generating random user_action  
                 printf(" ramdom user action = %i \n",user_action);
                 //cin >> user_action;
-                
           }
 return 0;
 }
@@ -212,7 +209,8 @@ void print_Q()
 
 void generate_rand()
 {
-
+        // Generate Random Pose for current state (position)
+        
         i = ((double) rand() / (RAND_MAX)) * (row) ;
         j = ((double) rand() / (RAND_MAX)) * (col) ;
         
diff --git a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~ b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~
index 62b41767..f3d3f718 100644
--- a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~
+++ b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~
@@ -7,7 +7,7 @@
 using namespace std;
 
 
-const int row = 13;
+const int row = 48;
 const int col = 13;
 
 
@@ -17,13 +17,10 @@ double R_step = 200;
 
 double R[row][col] = {0};
 double Q[row][col] = {0};
- 
-bool Goal = false;
 
 int iterations = 1;
 int it_;
 int user_action;
-int update_final_state;
 double Q_next_state;
 
 int i,j;
@@ -37,9 +34,10 @@ int R_indx_j = .5 * col;
 int P_indx_i = row - 2;
 int P_indx_j = col - 1;
 
-int counter = 1;
+int counter;
 int Time_Reward;
 double sample;
+
 void print_R();
 void print_Q();
 void generate_rand();
@@ -47,7 +45,7 @@ void generate_rand();
 int main()
 {
         R[R_indx_i][R_indx_j] = 50; // reward
-        R[P_indx_i][P_indx_j] = -100; // punishment
+        R[P_indx_i][P_indx_j] = -60; // punishment
 
         print_R();
         
@@ -146,12 +144,10 @@ int main()
                                 Q[i][j] = ((1 - alpha) * Q[i][j]) + (alpha * sample);
                         }
                         
-                        counter = 1;
+                        counter = 0;
                         print_Q();
                         generate_rand();
-                        iterations++;
-                                
-                                //Goal = true;     
+                        iterations++;    
                 } else if (i == P_indx_i && j == P_indx_j) // - Reward => Punishment
                 {
                            cout << "\n Failed to achieve a goal! \n";
@@ -169,10 +165,9 @@ int main()
                 }
                 
                 cout << "\n Q_value = " << Q_curr_state << " , actions N(1), E(2), S(3), W(4) : ";
-                user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1;
+                user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; // Generating random user_action  
                 printf(" ramdom user action = %i \n",user_action);
                 //cin >> user_action;
-                
           }
 return 0;
 }
@@ -214,7 +209,8 @@ void print_Q()
 
 void generate_rand()
 {
-
+        // Generate Random Pose for current state (position)
+        
         i = ((double) rand() / (RAND_MAX)) * (row) ;
         j = ((double) rand() / (RAND_MAX)) * (col) ;
         
-- 
GitLab