import numpy as np
import scipy

def get_matrices_A_B(M, m, g, l):
    """
    Returns the system matrix A and input matrix B.

    Parameters:
    M (float): Mass of the cart.
    m (float): Mass of the pendulum.
    g (float, optional): Acceleration due to gravity. 
    l (float, optional): Length of the pendulum.

    Returns:
    A (numpy.ndarray): System matrix.
    B (numpy.ndarray): Input matrix.
    """

    A = np.array([
        [0, 1, 0, 0],
        [0, 0, -(m * g) / M, 0],
        [0, 0, 0, 1],
        [0, 0, (m * g) / (M * l) + g / l, 0]
    ])

    B = np.array([
        [0],
        [-1 / M],
        [0],
        [1 / (M * l)]
    ])

    return A, B

def u_func(x, K):
    """
    Proportional control.

    Parameters:
    x (numpy.ndarray): state vector (4, )
    K (numpy.ndarray): Proportional control matrix (1, 4)

    Returns: 
    u (numpy.ndarray): control input (1, )
    """
    return - K @ x

def dxdt_wrapper(x, _, K):
    """Wrapper for dxdt_nonlinear functions with different parameters."""
    
    return dxdt_nonlinear(x, _, u_func(x, K))

def dxdt_linear(x, _, u, omega=np.zeros(4)):
    """
    Returns the state vector derivatives, using a linearized system.

    Parameters:
    x (numpy.ndarray): state vector (4, )
    u (numpy.ndarray): control input (1, )
    omega (numpy.ndarray): gaussian noise (4, )

    Returns:
    dxdt (numpy.ndarray): state vector derivatives (4, )
    """
    dxdt = A @ x.reshape(4,1) + B @ u.reshape((1,1)) + omega.reshape(4,1)

    return dxdt.reshape(4)

def dxdt_nonlinear(x, _, u, omega=np.zeros(4)):
    """
    Function that defines the non-linear first order differential equations 
    for the inverted pendulum. 

    Parameters:
    ----------
    x (numpy.ndarray): state vector (4, )
    u (numpy.ndarray): control input (1, )
    omega (numpy.ndarray): gaussian noise (4, )

    Returns:
    --------
    dxdt (numpy.ndarray): full state derivate vector (4, )

    """
    
    theta = x[2]
    global g, m, l, M
    force = u[0]

    dxdt = np.zeros(4)

    dxdt[0] = x[1]
    dxdt[1] = (-force - m*g*np.cos(theta)*np.sin(theta) + m * l * x[3]**2 * np.sin(theta)) / (M + m * np.sin(theta)**2)
    dxdt[2] = x[3]
    dxdt[3] = (force*np.cos(theta) - m * l * theta**2 * np.sin(theta) * np.cos(theta) + (M + m) *g *np.sin(theta)) / (M*l + m*l*np.sin(theta)**2)

    # gaussian noise
    dxdt += omega.flatten()

    return np.resize(dxdt, 4)

def solve_cart_control(t, X0, K, dxdt):
    """
    Given an initial condition, solve the control problem at each time step. 

    Parameters: 
    -----------
    t (numpy.ndarray): time values with shape (n+1, )
    X0 (numpy.ndarray): Initial conditions on state variables (4,1)
    K (numpy.ndarray): Proportional control matrix (1, 4).
    dxdt (function): full state derivative function. 

    Returns
    -------
    X (numpy.ndarray): the state vector at each time (n+1, 4). 
    U (numpy.ndarray): the control values (n+1, 1). 

    """

    X = scipy.integrate.odeint(dxdt_wrapper, np.resize(X0, 4), t, (K,  ))
    U = (K @ X.T).T

    return X, U

import matplotlib.pyplot as plt 
%matplotlib inline

from IPython.display import HTML
from matplotlib import animation

def plot_state_vector_vs_time(t, X):
    """
    Simple function to plot x, x_dot, theta, theta_dot against time. 
    """

    plt.plot(t, X[:,0], label="x")
    plt.plot(t, X[:,1], label="x dot")
    plt.plot(t, X[:,2], label="theta")
    plt.plot(t, X[:,3], label="theta dot")


    plt.title('State variables vs time')
    plt.legend()
    plt.xlabel("t [s]")

    plt.show()

def plot_input_vs_time(t, U):
    """
    Simple function to plot F against time.
    """

    plt.plot(t, U[:,0], label="Applied force")

    plt.title("Applied force vs time")
    plt.legend()
    plt.xlabel("t [s]")

    plt.show()


def animate_cart_and_pendulum(t, X):
    """
    Animate the position of the cart and the inverse pendulum based on the passed in state matrix and time vector. 
    """

    fig = plt.figure(figsize=(8,6.4))
    ax = fig.add_subplot(111,autoscale_on=False,\
                        xlim=(-1.5,5),ylim=(-0.4,5))
    ax.set_xlabel('position')
    ax.get_yaxis().set_visible(False)

    start, = ax.plot([-1,-1],[-1.5, 5],'k:',lw=2)
    objective, = ax.plot([0,0],[-0.5, 5],'k:',lw=2)

    mass1, = ax.plot([],[],linestyle='None',marker='s',\
                    markersize=40,markeredgecolor='k',\
                    color='blue',markeredgewidth=2)
    mass2, = ax.plot([],[],linestyle='None',marker='o',\
                    markersize=20,markeredgecolor='k',\
                    color='blue',markeredgewidth=2)
    line, = ax.plot([],[],'o-',color='blue',lw=4,\
                    markersize=6,markeredgecolor='k',\
                    markerfacecolor='k')

    time_template = 'time = %.1f [s]'
    time_text = ax.text(0.7,0.9,'',transform=ax.transAxes)

    theta_template = 'theta = %.4f [rad]'
    theta_text = ax.text(0.7,0.86,'',transform=ax.transAxes)

    def init():
        mass1.set_data([],[])
        mass2.set_data([],[])
        line.set_data([],[])
        time_text.set_text('')
        theta_text.set_text('')
        return line, mass1, mass2, time_text, theta_text

    def animate(i):
        theta = X[i,2]
        x1, y1 = X[i,0], -0.1
        x2, y2 = x1 + l*np.sin(theta), l*np.cos(theta)
        mass1.set_data([x1],[y1])
        mass2.set_data([x2],[y2])
        line.set_data([x1,x2],[y1,y2])
                    
        time_text.set_text(time_template % t[i])
        theta_text.set_text(theta_template % theta)

        return line, mass1, mass2, time_text, theta_text
    
    # Avoid showing the plt twice.
    plt.close()

    return animation.FuncAnimation(fig, animate, \
            np.arange(len(t)), \
            blit=False,init_func=init)

# Set initial conditions
M, m = 17, 2
l = 4
g = 9.8

# Get the linearized equations for the state vector and force vector.
A, B = get_matrices_A_B(M, m, g, l)

# Let's set the proportional control matrix equal to zero. 
Kr = np.zeros((1, 4))

X0 = np.array([-1, 0, 0.1, 0])
t = np.linspace(0, 15, 75)

X, U = solve_cart_control(t, X0, Kr, dxdt_wrapper)

HTML(animate_cart_and_pendulum(t, X).to_html5_video())

import control 

rank = np.linalg.matrix_rank(control.ctrb(A, B))
print(f"The rank of the controllability matrix is: {rank}")

The rank of the controllability matrix is: 4

# Set four arbitrary negative eigenvalues 
eigs = [-1, -1.1, -1.2, -1.3]

Kr = control.place(A, B, eigs)

print(f"Eigenvalues of A - BK: {np.linalg.eig(A - B @ Kr)[0]}")

Eigenvalues of A - BK: [-1.3 -1.  -1.2 -1.1]

X, U = solve_cart_control(t, X0, Kr, dxdt_wrapper)

HTML(animate_cart_and_pendulum(t, X).to_html5_video())

plot_state_vector_vs_time(t, X)
plot_input_vs_time(t, U)

def get_matrices_Q_R(q1, q2, q3, q4, r):
    """
    Returns cost matrices Q and R that effectively place a weight on 
    the cost of errors in the state vector (q1, q2, q3, q4) and the cost
    of non-zero control impulse (r). 

    Parameters:
    q1 (float): relative weight of position of cart.
    q2 (float): relative weight of velocity of cart. 
    q3 (float): relative weight of displacement theta of pole.
    q4 (float): relative weight of angular velocity of pole. 
    r (float): relative weight of control force. 

    Returns:
    R (numpy.ndarray): weights of control vars.
    Q (numpy.ndarray): weights of system vars. 
    """

    R = np.array([r])

    Q = np.array([
        [q1, 0, 0, 0],
        [0, q2, 0, 0],
        [0, 0, q3, 0],
        [0, 0, 0, q4]
    ])

    return Q, R

def J(t, Q, R, X, U):
    cost = 0

    for i in range(len(t)):
        cost += X[i].T @ Q @ X[i] + U[i] * R * U[i]
    
    return cost


def find_K_lqr(A, B, Q, R):
    """
    Return the matrix P, which satisfies the Riccati differential equation.

    Parameters:
    -----------
    A (numpy.ndarray): System matrix (4,4).
    B (numpy.ndarray): Input matrix (4,1).
    Q (numpy.ndarray): Weights of system vars (4, 4).
    R (numpy.ndarray): Weights of control vars (1, 1). 

    Returns
    -------
    K           : the matrix solution of the Riccati equation. 
    """
    
    P = scipy.linalg.solve_continuous_are(A, B, Q, R)

    return 1/R * B.T @ P

q1, q2, q3, q4 = 1, 1, 1, 1
r = 0.001

Q, R = get_matrices_Q_R(q1, q2, q3, q4, r)

K = find_K_lqr(A, B, Q, R)

X, U = solve_cart_control(t, X0, K, dxdt_wrapper)

plot_state_vector_vs_time(t, X)
plot_input_vs_time(t, U)

HTML(animate_cart_and_pendulum(t, X).to_html5_video())

# Define C where we only observe x1
C = np.array([1, 0, 0, 0]).reshape(1,4)

print(f"If we can observe only x1 at all time steps, then the system is fully observable as the rank of the observability matrix is {np.linalg.matrix_rank(control.obsv(A, C))}.")

If we can observe only x1 at all time steps, then the system is fully observable as the rank of the observability matrix is 4.

Vd = np.eye(4) # disturbance covariance 
Vn = 1 # measurement noise covariance

Kf = control.lqr(A.T, C.T, Vd, Vn)[0].T

print(f"A shape {A.shape}")
print(f"C shape {C.shape}")
print(f"Kf shape {Kf.shape}")

print(f"The eigenvalues of A - Kf C : {np.linalg.eig(A - Kf @ C)[0]}")

A shape (4, 4)
C shape (1, 4)
Kf shape (4, 1)
The eigenvalues of A - Kf C : [-0.9125859 +0.54543615j -0.9125859 -0.54543615j -1.82557088+0.j
 -1.43983912+0.j        ]

def get_noise(Vd, Vn, seed=None):
    """ 
    Returns a 5 by 1 matrix of normal distributed random numbers 
    based on the covariance matrix Vd and the scalar variance Vn.as_integer_ratio

    Parameters:
    Vd (numpy.ndarray): Covariance matrix (4, 4).
    Vn (float): Variance.

    Returns:
    omega (numpy.ndarray): Gaussian noise (5, 1).

    """
    if seed:
        np.random.seed(seed)

    omega_d = np.random.multivariate_normal(np.zeros(4), Vd)
    omega_n = np.random.normal(0, Vn)

    omega = np.zeros(5)
    omega[0:4] = omega_d
    omega[4] = omega_n

    return omega.reshape(5,1)

def dxhatdt(_, x_hat, u, y):
    """
    Returns the linearized derivative for xhat.

    Parameters:
    x_hat (numpy.ndarray): estimate of x, (4, 1)
    u (numpy.ndarray): control input (1, )
    y (numpy.ndarray): measurement (1, )

    Return
    dxhatdt (numpy.ndarray): estimate of dx/dt (4).
    
    """

    dxhatdt = (A - Kf @ C) @ x_hat + B @ u + Kf @ y
    
    return dxhatdt.reshape(4)

def dxdt_linear_wrapper(_, x, u, omega=np.zeros((4,1))):
    """Wrapper of the previously defined linearized derivative which listed parameters in a different order."""

    return dxdt_linear(x, _, u, omega)

def dxdt_nonlinear_wrapper(_, x, u, omega=np.zeros(4)):
    """Wrapper of the previously defined non-linear derivative which listed parameters in a different order."""

    return dxdt_nonlinear(x, _, u, omega)

import copy

def run_cart_sim(seed=0):
    """Function that iterates over each time step and returns the true state, the state with noise, the estimated state, the measurement, the control input, and the noise. """
    
    X_true = np.zeros((len(X0), len(t)))
    X_true[:, 0] = X0
    X_est = copy.deepcopy(X_true)
    
    y = np.zeros((len(t),1))
    u = np.zeros((len(t), 1))
    omegas = np.zeros((len(t), 5))
    dt = t[1]-t[0]
    y[0] = C @ X0

    for i in np.arange(len(t)-1):
        omega = get_noise(Vd*0.1, Vn*0.1, seed=i+seed)
        
        u_est = -Kr @ X_est[:, i]
        
        dxdt_est = dxhatdt(t[i], X_est[:, i], u_est, y[i])

        # The fundamental truth model introduces some disturbance noise at each time step. 
        dxdt_true = dxdt_nonlinear_wrapper(t[i], X_true[:, i], u_est, omega[0:4])
        
        X_est[:, i+1] = X_est[:, i] + dxdt_est * dt
        X_true[:, i+1] =  X_true[:, i] + dxdt_true * dt

        y[i+1] = C @ X_true[:, i+1] + omega[4]
        u[i] = u_est
        omegas[i] = omega.reshape(5)
    

    return X_true , X_est, y, u, omegas

def plot_true_and_estimated_data(X_true, X_est, y, t):
    """Plot the true, true with noise, and estimated data of each state variable."""
    
    plt.title("Cart position")
    plt.plot(t, y, label="measured")
    plt.plot(t, X_true[0,:], label="True")
    plt.plot(t, X_est[0,:], label="Est")
    plt.ylabel("position [m]")
    plt.xlabel("time [s]")
    plt.legend()
    plt.show()

    plt.title("Cart velocity")
    plt.plot(t, X_true[1,:], label="True")
    plt.plot(t, X_est[1,:], label="Est")
    plt.xlabel("time [s]")
    plt.ylabel("velocity [m/s]")
    plt.legend()
    plt.show()

    plt.title("Pendulum angle")
    plt.plot(t, X_true[2,:], label="True")
    plt.plot(t, X_est[2,:], label="Est")
    plt.xlabel("time [s]")
    plt.ylabel("angle [rad]")
    plt.legend()
    plt.show()

    plt.title("Pendulum angular velocity")
    plt.plot(t, X_true[3,:], label="True")
    plt.plot(t, X_est[3,:], label="Est")
    plt.xlabel("time [s]")
    plt.ylabel("angular velocity [rad/s]")
    plt.legend()
    plt.show()

Vd = np.eye(4) # disturbance covariance 
Vn = 1 # measurement noise covariance

Kf = control.lqr(A.T, C.T, Vd, Vn)[0].T

t = np.linspace(0, 10, 5000)

X_true, X_est, y, u, omegas = run_cart_sim(seed=1)

plot_true_and_estimated_data(X_true, X_est, y, t)

Vd = np.eye(4) * 0.001 # disturbance covariance 
Vn = 0.01 # measurement noise covariance

q1, q2, q3, q4 = 1, 1, 1, 1
r = 0.00001

Q, R = get_matrices_Q_R(q1, q2, q3, q4, r)
K = find_K_lqr(A, B, Q, R)

Kf = control.lqr(A.T, C.T, Vd, Vn)[0].T

t = np.linspace(0, 100, 100000)

X_true, X_est, y, u, omegas = run_cart_sim()

plot_true_and_estimated_data(X_true, X_est, y, t)

Inverted Pendulum¶

1. Kinematics¶

1.1 Apply the principle of least action to calculate the differential equations¶

1.2 Linearization of the problem¶

1.3 Write in matrix form¶

1.4 Code¶

1.5 Sanity check¶

2. Control¶

2.1 Apply a Linear Quadratic Regulator with infinite time horizons to solve the controls problem¶

2.2 Code¶

3. State estimation with a Kalman Filter¶

3.1 Observability¶

3.3 Code¶

3.4 Example with large state uncertainty¶

3.5 Example with smaller state uncertainty¶

4. Conclusion¶