I found this relatively simple Arduino code for controlling a self-learning two servo crawling robot. But I just cannot wrap my head around how he is using the R array to represent the servos. Can anyone interpret for me? His website is here and it gives some detail but not enough for me. https://planetachatbot.com/q-learning-con-arduino-crawling-robot-espanol-5eb0acf5aaaf
Here is the code:
/*
Q_Learning Robot
by: Erick M. Sirpa
*/
#include <Servo.h>
void Mostrar(float Q[][4]);
float distancia;
float tiempo;
int TRIGGER=8,ECHO=7;
Servo servo1,servo2;
int valor=0;
int act=0;
int ang=40;
int ang2=0;
int ang_t=0;
int ang2_t=0;
float Q[16][4]={{ 0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{ 0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{ 0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0}};
int action=0;
int state=0;
int cont=0;
float gamma = 0.8;
float Qmax=0;
float a=0,b=0;
int x=0;
int goal=15;
void setup (){
servo1.attach(9);
servo2.attach(6);
pinMode(TRIGGER, OUTPUT);
pinMode(ECHO, INPUT);
Serial.begin(9600);
float R[16][4] = {
{ 0, -1, 0, -1},
{-1, -1, 0, 0},
{-1, -1, 0, 0},
{-1, -1, -1, 0},
{ 0, 0, 0, -1},
{-1, 0, 0, 0},
{-1, 0, 0, 0},
{-1, 0, -1, 0},
{ 0, 0, 0, -1},
{-1, 0, 0, 0},
{-1, 0, 0, 0},
{-1, 0, -1, 0},
{-1, 0, 0, -1},
{-1, -1, 0, 0},
{-1, -1, 1000,0},
{-1, 0, -1, 0}};
int pos[16][3]={
{0,2,0},
{2,3,0},
{2,3,0},
{3,3,0},
{0,1,2},
{2,3,0},
{2,3,0},
{3,3,0},
{0,1,2},
{2,3,0},
{2,3,0},
{3,3,0},
{1,2,1},
{2,3,3},
{2,3,3},
{1,3,3},
};
int nstate=0;
float diff=0,d_prom=0,d_ant=0, d_new=0;
float point=0;
int cc=0;
for(int d=0;d<40;d++){
d_prom=dist()+d_prom;
delay(100);
}
d_ant=d_prom/20;
Serial.println(d_ant);
delay(1000);
for (int epoca=0;epoca<10;epoca++)
{
randomSeed(analogRead(0));
state=random(15);
ang=40;
ang2=0;
while(state!=goal){
ang_t=ang;
ang2_t=ang2;
cc=0;
cont++;
x=random(2);
action=pos[state][x];
if(action==0){
nstate=state+4;
ang=ang+20;
ang2=0;
}
else if(action==1){
nstate=state-4;
ang=ang-20;
ang2=0;
}
else if(action==2){
nstate=state+1;
ang2=ang2+45;
}
else if(action==3){
nstate=state-1;
ang2=ang2-45;
}
servoVelocidad(servo1,ang_t,ang,5);
servoVelocidad(servo2,ang2_t,ang2,5);
d_new=dist();
diff=d_new-d_ant;
d_ant=d_new;
if(diff>=1.9 ){
point=map(diff,1,4,5,10);
R[nstate][action]=point;
Serial.println(point);
}
Serial.println(" ");
a = -10;
for (int i = 0; i < 4; i++) {
if (a < Q[nstate][i]) {
a = Q[nstate][i];
}
}
Qmax = a * gamma;
Q[state][action] = R[state][action] + Qmax;
state = nstate;
}
}
Mostrar®;
Serial.println(" “);
Serial.println(” ");
Mostrar(Q);
}
void loop(){
state = random(3);
ang=40;
ang2=0;
while(state!=goal){
b = -10;
for (int i = 0; i < 4; i++) {
if (b <= Q[state][i]) {
b = Q[state][i];
act = i;
}
}
ang_t=ang;
ang2_t=ang2;
if(act==0){
state=state+4;
ang=ang+20;
ang2=0;
}
else if(act==1){
state=state-4;
ang=ang-20;
ang2=0;
}
else if(act==2){
state=state+1;
ang2=ang2+45;
}
else if(act==3){
state=state-1;
ang2=ang2-45;
}
servoVelocidad(servo1,ang_t,ang,25);
servoVelocidad(servo2,ang2_t,ang2,25);
}
}
void Mostrar(float Q[][4]){
for (int i=0;i<16;i++){
for(int j=0;j<4;j++){
Serial.print(Q[i][j]);
Serial.print(" ");
}
Serial.println(" ");
}
}
float dist() {
digitalWrite(TRIGGER, LOW);
delayMicroseconds(2);
digitalWrite(TRIGGER, HIGH);
delayMicroseconds(10);
digitalWrite(TRIGGER, LOW);
// Calcula la distancia midiendo el tiempo del estado alto del pin ECHO
tiempo = pulseIn(ECHO, HIGH);
distancia = tiempo / 58.00;
/*
Serial.print(distancia);
Serial.println(“cm”);
delay(100);
*/
return distancia;
}
void servoVelocidad(Servo servo, int anguloA, int anguloB, int velocidad) {
if (anguloA<anguloB) {
for (int angulo = anguloA; angulo <= anguloB; angulo=angulo+2)
{
servo.write(angulo);
delay(velocidad);
}
}
else {
for (int angulo = anguloA; angulo >= anguloB; angulo=angulo-2)
{
servo.write(angulo);
delay(velocidad);
}
}
}