From 7a03e97cc541d1408b6ab3f99b732256fa796d01 Mon Sep 17 00:00:00 2001
From: Farid Alijani <farid.alijani@student.lut.fi>
Date: Mon, 11 Apr 2016 21:19:42 +0200
Subject: [PATCH] Q learning ro mobile robot

---
 .../CamMark/camtomar/include/VisionControl.h  | 15 ++-
 .../CamMark/camtomar/include/VisionControl.h~ | 50 ++++++----
 .../CamMark/camtomar/src/VisionControl.cpp    | 97 +++++++++++++++----
 .../CamMark/camtomar/src/VisionControl.cpp~   | 13 ++-
 .../Machine_Learning/Practice/Q_learning.cpp  |  5 +-
 .../Machine_Learning/Practice/Q_learning.cpp~ | 10 +-
 6 files changed, 133 insertions(+), 57 deletions(-)

diff --git a/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h b/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h
index e7e8fbd8..c462fe23 100644
--- a/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h
+++ b/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h
@@ -97,6 +97,7 @@ private:
         void dock(double VelX, double VelY, double omegaZ);
 
 	void move2docking(double VelX_est, double VelY_est, double omegaZ_est);
+	//double* Q; 
         
 public:
 
@@ -163,18 +164,19 @@ public:
 	// ---- Q_LEARNING PARAMETERS ------ //
 
 	static double x_new,y_new,theta_new; 	// inp (y,theta)
+	static const double y_up,y_dwn,theta_up,theta_dwn;
 	static double Kp_y,Ki_y,Kd_y; 		// actions (Kp,Ki,Kd)
 	
 	static int R_indx_i, R_indx_j, P_indx_i, P_indx_j;
 
 
-	static int row, col;
+	static const int row, col;
 	static double gamma;
 	static double alpha;
 	static double R_step;
 	
-	double R[row][col];
-	double Q[row][col];
+	static double R[5][5];
+	static double Q[5][5];
 
 	static double Q_curr_state,Q_next_state;
 	static double reward;
@@ -182,7 +184,10 @@ public:
 	static int Time_Reward;
 	static double sample;
 
-	void Q_Learning();
+	void Q_Learning(double y, double theta);
+	void print_Q();
+
+	void print_R();
 
 	// ---- Q_LEARNING PARAMETERS ------ //
 
@@ -202,6 +207,6 @@ public:
 
         double marpose[6];
         double camPose[6];
-        double RefPose[6];
+        static const double RefPose[4];
         
 };
diff --git a/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h~ b/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h~
index 9619df65..e7e8fbd8 100644
--- a/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h~
+++ b/MobileRobot/AugReaMarker/CamMark/camtomar/include/VisionControl.h~
@@ -121,15 +121,11 @@ public:
   void GenerateRandomVal();
   void RandomPose(double X_rand, double Y_rand, double theta_rand);
 
-         static double Pos_Px,Pos_Ix,Pos_Dx;
-	 static double Pos_Py,Pos_Iy,Pos_Dy;
+         static double Kp_x,Ki_x,Kd_x;
+	 
+	static double Kp_theta,Ki_theta,Kd_theta;
 
-         static double S_Ang_P,S_Ang_I,S_Ang_D;
-         static double L_Ang_P,L_Ang_I,L_Ang_D;
-
-	static double Ang_P,Ang_I,Ang_D;
-
-	static double x_new,y_new,theta_new;
+	
 
 	static double safety_margin_X;
 	double x_rand_SM;
@@ -163,7 +159,34 @@ public:
         static double control_signalYAW;
         // ---- CONTROLL PARAMETERS ------ //
         
-        
+	        
+	// ---- Q_LEARNING PARAMETERS ------ //
+
+	static double x_new,y_new,theta_new; 	// inp (y,theta)
+	static double Kp_y,Ki_y,Kd_y; 		// actions (Kp,Ki,Kd)
+	
+	static int R_indx_i, R_indx_j, P_indx_i, P_indx_j;
+
+
+	static int row, col;
+	static double gamma;
+	static double alpha;
+	static double R_step;
+	
+	double R[row][col];
+	double Q[row][col];
+
+	static double Q_curr_state,Q_next_state;
+	static double reward;
+	static int counter;
+	static int Time_Reward;
+	static double sample;
+
+	void Q_Learning();
+
+	// ---- Q_LEARNING PARAMETERS ------ //
+
+
 	// ---- TIMER PARAMETERS ------ //
 	static int msec,sec,min,hr;
 	// ---- TIMER PARAMETERS ------ //
@@ -181,13 +204,4 @@ public:
         double camPose[6];
         double RefPose[6];
         
-        /*double prev_error[6];
-        double int_error[6];
-        double curr_error[6];
-        double diff[6];
-        double p_term[6];
-        double d_term[6];
-        double i_term[6];
-        double control_signal[6];*/
-        
 };
diff --git a/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp b/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp
index 9552bb01..53517b4f 100644
--- a/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp
+++ b/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp
@@ -85,10 +85,13 @@ double ImageConverter::p_termYAW = 0;
 double ImageConverter::i_termYAW = 0;
 double ImageConverter::d_termYAW = 0;
 
-
 double ImageConverter::control_signalX, ImageConverter::control_signalY, ImageConverter::control_signalYAW;
 // ---- CONTROLL PARAMETERS ------ //
 
+
+// ---- Ref. Values ---- //
+const double ImageConverter::RefPose[4] = {-.0957, -0.0605845 /* Y_ref*/ ,  0.303369 /* X_ref*/ , 0.586541 /* theta_ref*/}; 
+
 // ----------------  PID gains---------------- //
 double ImageConverter::Kp_y = .4;
 double ImageConverter::Ki_y = .0042;
@@ -100,6 +103,12 @@ double ImageConverter::Kd_theta = .2 * Kd_y;
 // ----------------  PID gains---------------- //
 
 // random pose initialized
+
+const double ImageConverter::y_up = .3; 
+const double ImageConverter::y_dwn = -.1; 
+const double ImageConverter::theta_dwn = -.3*RefPose[3]; 
+const double ImageConverter::theta_up = .3*RefPose[3];
+
 double ImageConverter::x_new,ImageConverter::y_new,ImageConverter::theta_new;
 
 // ----------------  Q_LEARNING PARAMETERS ---------------- //
@@ -108,11 +117,12 @@ double ImageConverter::gamma = .8;
 double ImageConverter::alpha = .1;
 double ImageConverter::R_step = 200;
 
+const int ImageConverter::row = 5/*(y_up - y_dwn) * 10000*/;
+const int ImageConverter::col = 5/*(theta_up - theta_dwn) * 1000*/;
 
-int ImageConverter::row = ;
-int ImageConverter::col = ;
+double ImageConverter::Q[5][5] = {0};
+double ImageConverter::R[5][5] = {0};
 
-double ImageConverter::Q_curr_state = Q[y_new][theta_new];
 double ImageConverter::Q_next_state;
 
 int ImageConverter::counter = 1;
@@ -120,8 +130,11 @@ int ImageConverter::Time_Reward;
 double ImageConverter::sample;
 
 
+int ImageConverter::R_indx_i = 0;
+int ImageConverter::R_indx_j = 2;
 
-
+int ImageConverter::P_indx_i = 1;
+int ImageConverter::P_indx_j = 1;
 // ----------------  Q_LEARNING PARAMETERS ---------------- //
 
 double ImageConverter::dock_started,ImageConverter::dock_finished,ImageConverter::docking_duration;
@@ -153,12 +166,15 @@ double ImageConverter::docking_counter = 1;
 	/* initialize random seed: */
   	srand (time(NULL));
 
-// Ref. Values
+/*// Ref. Values
 RefPose[0] = -.0957;
 
-RefPose[1] = -0.0194504;
-RefPose[2] = 0.304966;
-RefPose[3] = 0.703702;
+RefPose[1] = -0.0193378;
+RefPose[2] = 0.306532;
+RefPose[3] = 0.702702;*/
+
+	R[R_indx_i][R_indx_j] = 50; // reward
+        R[P_indx_i][P_indx_j] = -60; // punishment
 
 x_rand_SM = RefPose[2] + .55; // 55 cm spreading domain in the x-axis while moving towards the random pose
 
@@ -513,11 +529,11 @@ camPose[3] = CamFB->pose.orientation.x; //  theta orientation
 			dock_finished = ros::Time::now().toSec();
 			docking_duration = dock_finished - dock_started;
 			ROS_INFO_STREAM("docking No. " << docking_counter << " is finished in "<< docking_duration <<" sec, moving to new Random Pose\n");		
-			//keepMoving = false;
+			keepMoving = false;
 			GenerateRandomVal();
 			docking_counter ++;
 			speed_reducer = 1;
-			Go2RandomPose = true;
+			//Go2RandomPose = true;
 
 		// to make sure that y & theta are within the threshold...
         	} else if (abs(RefPose[2] - camPose[2]) <= safety_margin_X)
@@ -541,6 +557,9 @@ camPose[3] = CamFB->pose.orientation.x; //  theta orientation
 		}else
         	{
                 	Controller(RefPose[2], camPose[2], RefPose[1], camPose[1], RefPose[3], camPose[3],.01);
+			Q_Learning(camPose[1],camPose[3]); // performing Q_Learning...
+			//print_R();
+
 		}
 	} else
 	{
@@ -575,9 +594,6 @@ void ImageConverter::Controller(double RefX, double MarPoseX, double RefY, doubl
 	{
 		control_signalX = 0;	
 	}
-
-	
-
         // -----------------Y--------------------- // 
 	 
 	if((RefY - MarPoseY) < -Py_eps || (RefY - MarPoseY) > Py_eps)
@@ -719,6 +735,7 @@ void ImageConverter::move2docking(double VelX_est, double VelY_est, double omega
 
 void ImageConverter::GenerateRandomVal()
 {
+
 	// ---------------- PID gains ------------------
 	Kp_y = ((double) rand() / (RAND_MAX)) * (.76 - .4) + .4; 	//.1 < Kp < .76
 	Ki_y = ((double) rand() / (RAND_MAX)) * .006;			// 0 < Ki < .006
@@ -726,8 +743,8 @@ void ImageConverter::GenerateRandomVal()
 	
 	// ------------------ Generating Random Pose ------------------
 	x_new = ((double) rand() / (RAND_MAX)) * (1.1 - x_rand_SM) + x_rand_SM;
-	y_new = ((double) rand() / (RAND_MAX)) * (.48 - (-.24)) + (-.24); // will be used for Q_Learning
-	theta_new = ((double) rand() / (RAND_MAX)) * (.5*RefPose[3] - (-.5*RefPose[3])) + (-.5*RefPose[3]); // will be used for Q_Learning
+	y_new = ((double) rand() / (RAND_MAX)) * (y_up - y_dwn) + y_dwn; // will be used for Q_Learning
+	theta_new = ((double) rand() / (RAND_MAX)) * (theta_up - theta_dwn) + theta_dwn; // will be used for Q_Learning
 }
 
 void ImageConverter::RandomPose(double X_rand, double Y_rand, double theta_rand)
@@ -834,9 +851,10 @@ commandPub.publish(msg_new);
 	
 }
 
-void ImageConverter::Q_Learning()
+void ImageConverter::Q_Learning(double y, double theta)
 {
-	while ( iterations <= it_ ) 
+
+/*	while ( iterations <= it_ ) 
         {       
                 if (user_action == 1 && y_new != 0) // North
                 {
@@ -922,7 +940,7 @@ void ImageConverter::Q_Learning()
                         }
                         
                         counter = 0;  
-                } else if (y_new == P_indx_i && theta_new == P_indx_j) // - Reward => Punishment
+                } else if (y_new == P_indx_i && theta_new == P_indx_j) // (-) Reward => Punishment
                 {
                            cout << "\n Failed to achieve a goal! \n";
                                 
@@ -937,5 +955,44 @@ void ImageConverter::Q_Learning()
                 user_action = ((double) rand() / (RAND_MAX)) * (5 - 1) + 1; // Generating random user_action  
                 printf(" ramdom user action = %i \n",user_action);
                 //cin >> user_action;
-          }
+          }*/
+}
+
+
+void ImageConverter::print_R()
+{
+         cout << " R = \n";
+        for(int i = 0; i <= (row - 1); i++)
+        {
+                for(int j = 0; j <= (col - 1); j++)
+                {
+                        cout << setw(col - 1) << R[i][j];
+			if(j < col - 1)
+			{
+				cout << " , ";
+			}
+		} // j
+                cout << "\n";
+        } // i
+        cout << "\n";
 }
+
+
+void ImageConverter::print_Q()
+{
+         cout << " Q = \n";
+        for(int i = 0; i <= (row - 1); i++)
+        {
+                for(int j = 0; j <= (col - 1); j++)
+                {
+                        cout << setw(col - 1) << Q[i][j];
+			if(j < col - 1)
+			{
+				cout << " , ";
+			}
+		} // j
+                cout << "\n";
+        } // i
+        cout << "\n";
+}
+
diff --git a/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp~ b/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp~
index 1ff2e063..c521ced5 100644
--- a/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp~
+++ b/MobileRobot/AugReaMarker/CamMark/camtomar/src/VisionControl.cpp~
@@ -523,15 +523,13 @@ camPose[3] = CamFB->pose.orientation.x; //  theta orientation
         	} else if (abs(RefPose[2] - camPose[2]) <= safety_margin_X)
 		{
 			if(
-				(abs(RefPose[1] - camPose[1]) > Py_eps) || 
-				(abs(RefPose[3] - abs(camPose[3])) > A_eps)
+				(abs(RefPose[1] - camPose[1]) > Py_eps) || (abs(RefPose[3] - abs(camPose[3])) > A_eps)
 			)
 			{	
 				ROS_INFO_STREAM(" delta_X < " << safety_margin_X << " m. , Fixing Y or theta. \n ");     			
 				Controller(RefPose[2], RefPose[2], RefPose[1], camPose[1], RefPose[3], camPose[3],.01);
 			} else if(
-				(abs(RefPose[1] - camPose[1]) <= Py_eps) && 
-				(abs(RefPose[3] - abs(camPose[3])) <= A_eps)				
+				(abs(RefPose[1] - camPose[1]) <= Py_eps) && (abs(RefPose[3] - abs(camPose[3])) <= A_eps)				
 				)
 			{
 				ROS_INFO("y & theta fixed successfully, MOVING STRAIGHT AHEAD ... \n");
@@ -541,6 +539,7 @@ camPose[3] = CamFB->pose.orientation.x; //  theta orientation
 		}else
         	{
                 	Controller(RefPose[2], camPose[2], RefPose[1], camPose[1], RefPose[3], camPose[3],.01);
+                	Q_Learning();
 		}
 	} else
 	{
@@ -775,7 +774,7 @@ geometry_msgs::Twist msg_new;
 				ROS_INFO("CCW rot. is fixed, only moving 2 right side ... \n");
 				vel_y = -.03;
 			}
-		}else if (abs(camPose[1] - Y_rand) <= y_thresh_undock)
+		} else if (abs(camPose[1] - Y_rand) <= y_thresh_undock)
 		{
 			ROS_INFO(" Y-axis is fixed, adjusting theta-axis ... ! \n");			
 			if (abs(abs(camPose[3]) - abs(theta_rand)) <= theta_thresh_undock)
@@ -921,8 +920,8 @@ void ImageConverter::Q_Learning()
                                 Q[y_new][theta_new] = ((1 - alpha) * Q[y_new][theta_new]) + (alpha * sample);
                         }
                         
-                        counter = 1;  
-                } else if (y_new == P_indx_i && theta_new == P_indx_j) // - Reward => Punishment
+                        counter = 0;  
+                } else if (y_new == P_indx_i && theta_new == P_indx_j) // (-) Reward => Punishment
                 {
                            cout << "\n Failed to achieve a goal! \n";
                                 
diff --git a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp
index 1ce7497f..856b3be4 100644
--- a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp
+++ b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp
@@ -6,9 +6,8 @@
 
 using namespace std;
 
-
-const int row = 48;
-const int col = 13;
+ const int row = 48;
+ const int col = 13;
 
 
 double gamma = .8;
diff --git a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~ b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~
index f3d3f718..c1e7aada 100644
--- a/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~
+++ b/MobileRobot/Machine_Learning/Practice/Q_learning.cpp~
@@ -6,14 +6,13 @@
 
 using namespace std;
 
-
-const int row = 48;
-const int col = 13;
+ const int row = 48;
+ const int col = 13;
 
 
 double gamma = .8;
 double alpha = .1;
-double R_step = 200;
+double R_step = 500;
 
 double R[row][col] = {0};
 double Q[row][col] = {0};
@@ -44,6 +43,9 @@ void generate_rand();
 
 int main()
 {
+        
+
+        
         R[R_indx_i][R_indx_j] = 50; // reward
         R[P_indx_i][P_indx_j] = -60; // punishment
 
-- 
GitLab