import numpy as np
import scipy as sp
from scipy.stats import norm
from scipy.stats import multivariate_normal
import matplotlib
import matplotlib.pyplot as plt

from gmm_tools import plot_GMM, plot_data, generate_Bayes_adventures_data

np.random.seed(0)


N = 500 # number of samples (number of stones)
D = 2 # number of dimensions (x and y coordinates)

gen = generate_Bayes_adventures_data(N, D)
x = gen[-1]

plot_data(x)
print(x.shape)

(500, 2)


class GMM():
    """
    Gaussian mixture model
    """

    def __init__(self, n_comp, data_dim=2, seed=None):

        super(GMM, self).__init__()

        self.n_comp = n_comp
        self.data_dim = 2
        self.init_param(seed=seed)
                
    def init_param(self, pis=None, means=None, covars=None, seed=None):
        """
        Initialize the model parameters using the provided arguments 
        or randomly.
        
        Inputs 
            pis: list of prior probabilities, length equal to self.n_comp
            means: list of GMM means, length equal to self.n_comp
            covars: list of GMM means, length equal to self.n_comp
        Outputs
            None
        """
        
        if seed is not None:
            np.random.seed(seed)
        
        if pis is not None:
            self.pis = pis
        else:
            self.pis = []
            for k in np.arange(self.n_comp):
                # prior set to 1/K
                self.pis.append(1/self.n_comp)
        
        if means is not None:
            self.means = means
        else:
            self.means = []
            for k in np.arange(self.n_comp):
                # mean vector drawn from a centered unit Gaussian
                mean = np.random.randn(self.data_dim)
                self.means.append(mean)
                
        if covars is not None:
            self.covars = covars
        else:
            self.covars = []
            for k in np.arange(self.n_comp):
                # identity covariance
                covar = np.eye(self.data_dim)
                self.covars.append(covar)
                
        if seed is not None:
            np.random.seed()
                
    def fit(self, data, n_iter=50):
        """
        Fit a GMM with the EM algorithm
        
        Inputs 
            data (number of points, dimension) array
            n_iter 
               
        Outputs
            log-marginal likelihood
        """
        LML = []
              
        for iter in np.arange(n_iter):
            
            resp = self.E_step(data)
            self.M_step(data, resp)
            LML.append(self.compute_LML(data))
            
        return LML
    
    def E_step(self, data):
        """
        Compute the responsabilities
        
        Inputs 
            data (number of points, dimension) array
               
        Outputs
            responsabilities (number of points, number of GMM components)
        """
        
        N = data.shape[0]
        
        resp = np.zeros((N,self.n_comp))
        
        ###### TO COMPLETE ######
        # Use the static method GMM.compute_pdf_multi_gaussian() defined below
        
        #########################
        
        return resp

    def M_step(self, data, resp):
        """
        Update the model parameters
        
        Inputs 
            data: (number of points, dimension) array
               
        Outputs
            None
        """
        
        ###### TO COMPLETE ######
        pass
        #########################
            
    def compute_LML(self, data):
        """
        Compute the log-marginal likelihood
        
        Inputs 
            data: (number of points, dimension) array
               
        Outputs
            log-marginal likelihood
        """
        
        LML = 0
        
        ###### TO COMPLETE ######              
        # Use the static method GMM.compute_pdf_multi_gaussian() defined below
        
        #########################
        
        return LML
        
    
    @staticmethod
    def compute_pdf_multi_gaussian(data, mean, covar):
        """
        Compute the pdf of a multivariate Gaussian distribution
                
        Inputs 
            data: data points to evaluate the pdf (number of points, dimension) array
            mean: mean vector (dimension,) array
            covar: covariance matrix (dimension, dimension) array
               
        Outputs
            pdf evaluated on 'data', (number of points,) array
        """
        rv = multivariate_normal(mean, covar)
        return rv.pdf(data)


gmm = GMM(n_comp=3, data_dim=2, seed=2)


LML_init = gmm.compute_LML(x)

print("log-marginal likelihood: %.4f" % LML_init)

if int(LML_init*1000) == -22548:
    print("so far, it seems to be ok")
else:
    print("argh, this is not the expected result, either you made a mistake, or my unit test is badly designed")


resp = gmm.E_step(x)
if np.sum(resp) == N:
    print("so far, it seems to be ok")
else:
    print("argh, this is not the expected result, either you made a mistake, or my unit test is badly designed")


z_hat = np.argmax(resp, axis=1)

fig1 = plt.figure(figsize=(10,4))
ax1 = fig1.add_subplot(111)
plot_GMM(x, z_hat, gmm.means, gmm.covars, colors=['b','g','r'], ax=ax1)
ax1.set_title('estimation')


gmm.M_step(x, resp)


LML = gmm.compute_LML(x)
delta_LML = LML - LML_init
print("log-marginal likelihood: %.4f" % LML)
print("log-marginal likelihood improvement: %.4f" % delta_LML)

if int(delta_LML*1000) == 19556:
    print("\nthe log-marginal likelihood increased, well done!")
else:
    print("argh, this is not the expected result, either you made a mistake, or my unit test is badly designed")


LML = gmm.fit(data=x, n_iter=50)


plt.plot(LML)
plt.title("log-marginal likelihood")
plt.xlabel("EM iterations")


resp = gmm.E_step(x)
z_hat = np.argmax(resp, axis=1)

fig1 = plt.figure(figsize=(10,4))
ax1 = fig1.add_subplot(111)
plot_GMM(x, z_hat, gmm.means, gmm.covars, colors=['b','g','r'], ax=ax1)
ax1.set_title('estimation')


(pis_true, means_true, covars_true, z_true, _) = gen


fig2 = plt.figure(figsize=(10,4))
ax2 = fig2.add_subplot(111)
plot_GMM(x, z_true, means_true, covars_true, colors=['b','g','r'], ax=ax2)
ax2.set_title('ground truth')


import matplotlib.pyplot as plt
import numpy as np
from sklearn.mixture import GaussianMixture

# Load the grayscale image using matplotlib (it supports only PNG by default)
image = plt.imread('beatles_BW.jpg')  # Replace with your grayscale image path

# Display the image
plt.imshow(image, cmap='gray')  # Choose a colormap appropriate for visualization
plt.axis('off')
plt.show()

Gaussian mixture model estimation with the EM algorithm¶

Theoretical work¶

Generative model¶

Posterior inference¶

Exercise 1¶

Question 1.1¶

Question 1.2¶

Question 1.3¶

Expectation-Maximization algorithm¶

Exercise 2¶

Question 2.1¶

Question 2.2¶

Question 2.2a¶

Question 2.2b¶

Question 2.2c¶

Practical work¶

Exercise 3¶

Exercise 3.1¶

Exercise 3.2¶

Exercise 3.3¶

If you got all my encouraging messages, then you are ready to fit the GMM on the data!¶

Exercise 3.4¶

Appendix¶