import matplotlib.pyplot as plt
import matplotlib
import numpy as np
from scipy.stats import beta

# Dimension of the correlation matrix
d = 8

# Number of sampled matrices
n = 10000

np.random.seed(1)

def metric(samples):
    # trace((L - \bar{L})(L - \bar{L}).T)
    samples_mean = np.mean(samples, axis=0, keepdims=True)
    return np.linalg.norm(samples - samples_mean, axis=(1, 2))**2

def metric_D(sigma, D):
    # trace((\sigma - \bar{D})(\sigma - \bar{D}).T)
    D_mean = np.mean(D, axis=0, keepdims=True)
    return np.linalg.norm(sigma - D_mean, axis=(1, 2))**2

def n_matrices_mul(matrices1, matrices2):
    # multiplication of matrices stored in two arrays
    return np.einsum('ijk,ikl->ijl', matrices1, matrices2)

# Sampling \sigma
rand_sigma = np.random.uniform(0.02, 0.9, size=(n, d))
sigma = np.zeros((n, d, d))
sigma[:, np.eye(d, dtype=bool)] = rand_sigma

def sample_perm(d=2, n=100, include_diag=True):
    # Uniform sampling with permutation for L

    #Get all needed random number
    rands = np.random.uniform(-1, 1, size=(n, (d - 1) * (d + 2) // 2 - d + 1))

    # Get off-diagonal lower triangular indices
    lower_indices = np.tril_indices(d, k=-1)
    
    # Init with zero matrix
    matrices = np.zeros((n, d, d))

    # Fill off-diagonal lower triangular elements by uniform random numbers
    matrices[:, lower_indices[0], lower_indices[1]] = rands

    # Set sign for diagonal elements that are about to shuffle else where
    matrices[:, np.eye(d, dtype=bool)] = np.random.choice([-1, 1], size=[n, d])
    
    # Rescaling in sequential order
    for i in range(d):
        if i == 0:
            matrices[:, 0, 0] = 1 
        else:
            matrices[:, i:, i] *= np.sqrt(1 - np.linalg.norm(matrices[:, i:, :i], axis=2) ** 2)

    # Shuffle the sampled elements 
    if include_diag:
        for i in range(d):
            for j in range(n):
                matrices[j, i, :i+1] = np.random.permutation(matrices[j, i, :i+1])
    else:
        for i in range(d):
            for j in range(n):
                matrices[j, i, :i] = np.random.permutation(matrices[j, i, :i])
    # Make the diagonal element positive
    mask = np.eye(d, dtype=bool)
    matrices[:, mask] = np.abs(matrices[:, mask])
    return matrices

samples_perm = sample_perm(d, n, include_diag=False)
rho_perm = n_matrices_mul(samples_perm, np.swapaxes(samples_perm, 1, 2))
D_perm = n_matrices_mul(sigma, samples_perm)
C_perm = n_matrices_mul(D_perm, np.swapaxes(D_perm, 1, 2))

def sample_unitri(d=2, n=100):
    # Uniform sampling on hemispheres

    # Number of elements in the lower triangular part (excluding diagonal)
    dd = (d - 1)*(d + 2) // 2

    # Sample random numbers for all n matrices
    rands = np.random.normal(0, 1, (n, dd))

    # Get lower triangular indices (including diagonal) for a single matrix
    lower_indices = np.tril_indices(d)

    # Initialize a 3D array to store the n matrices
    matrices = np.zeros((n, d, d))

    # Create a vector that includes the fixed 1 followed by the random values
    ones_and_rands = np.hstack([np.ones((n, 1)), rands])

    # Fill all matrices at the lower triangular positions
    matrices[:, lower_indices[0], lower_indices[1]] = ones_and_rands

    # Normalize each matrix row-wise using broadcasting
    row_norms = np.linalg.norm(matrices, axis=2, keepdims=True)
    normalized_matrices = matrices / row_norms
    mask = np.eye(d, dtype=bool)
    normalized_matrices[:, mask] = np.abs(normalized_matrices[:, mask])

    return normalized_matrices

samples_hemi = sample_unitri(d, n)
rho_hemi = n_matrices_mul(samples_hemi, np.swapaxes(samples_hemi, 1, 2))
D_hemi = n_matrices_mul(sigma, samples_hemi)
C_hemi = n_matrices_mul(D_hemi, np.swapaxes(D_hemi, 1, 2))

# sample a large number of L using both algorithms
test_perm = sample_perm(2, n*100)
test_hemi = sample_unitri(2, n*100)

def theory_21_hemi(x):
    # Analytical CDF for rho_{21} for hemisphere method
    return 1 - np.arccos(x)/np.pi

def theory_21_perm(x):
    # Analytical CDF for rho_{21} for permutation method
    return (x + 2 + (2*(x > 0)-1)*(1 - np.sqrt(1 - x**2))) / 4

x = np.linspace(-1, 1, 200)

fig, axs = plt.subplots(1, 3, figsize=(15, 5))

axs[0].ecdf(test_perm[:, 1, 0])
axs[0].plot(x, theory_21_perm(x), label='theoretical cdf perm rho21')
axs[0].set_title('empirical vs theory permutation')
axs[0].set_xlim(-1, 1)
axs[0].set_ylim(0, 1)

axs[1].ecdf(test_hemi[:, 1, 0])
axs[1].plot(x, theory_21_hemi(x), label='theoretical cdf hemi rho21')
axs[1].set_title('empirical vs theory hemisphere')
axs[1].set_xlim(-1, 1)
axs[1].set_ylim(0, 1)

axs[2].plot(x, theory_21_perm(x), label='theoretical cdf perm rho21')
axs[2].plot(x, theory_21_hemi(x), label='theoretical cdf hemi rho21')
axs[2].set_title('theory permutation vs hemisphere')
axs[2].set_xlim(-1, 1)
axs[2].set_ylim(0, 1)
plt.legend()

<matplotlib.legend.Legend at 0x1bdac2c4770>

def cvine_uniform(d):
    # Generates a random positive-definite correlation matrix using the C-vine method.
    # Translated from R implementation
    
    # Initialize partial correlation matrix P and correlation matrix R
    P = np.zeros((d, d))
    R = np.eye(d)
    # Generate random correlation matrix
    beta_param = 1 + (d - 1) / 2
    for k in range(d - 1):
        beta_param -= 0.5
        for i in range(k + 1, d):
            # Sample partial correlation from the Beta distribution
            P[k, i] = beta.rvs(beta_param, beta_param)
            P[k, i] = (P[k, i] - 0.5) * 2  # Shift to [-1, 1]
            p = P[k, i]
            
            # Convert partial correlation to raw correlation
            if k > 0:
                for l in range(k - 1, -1, -1):
                    p = p * np.sqrt((1 - P[l, i]**2) * (1 - P[l, k]**2)) + P[l, i] * P[l, k]
            
            # Update correlation matrix R
            R[k, i] = p
            R[i, k] = p
    
    return R

samples_cvine = []
for i in range(n):
    samples_cvine.append(cvine_uniform(d))
samples_cvine = np.linalg.cholesky(np.array(samples_cvine))
rho_cvine = n_matrices_mul(samples_cvine, np.swapaxes(samples_cvine, 1, 2))
D_cvine = n_matrices_mul(sigma, samples_cvine)
C_cvine = n_matrices_mul(D_cvine, np.swapaxes(D_cvine, 1, 2))

def hypersphere(d):
    # Hyperspherical Parameterization
    # translated from R implementation

    # Generate angles theta from PDF (sin(theta))^k, k>=1, 0<theta<pi
    e = np.ones(d)
    theta = np.zeros((d, d))
    for j in range(d - 1):
        theta[(j+1):d, j] = randcorr_sample_sink((d - j) * e[(j+1):d])
    
    # Gonstruct lower triangular Cholesky factor
    L = np.ones((d, d))
    for i in range(1, d):
        L[i, 1:i+1] = np.cumprod(np.sin(theta[i, :i]))
    
    R = np.cos(theta)
    R[np.triu_indices(d, k=1)] = 0
    L = L * R
    
    return L


def randcorr_sample_sink(k):
    N = len(k)
    logconst = 2 * np.log(np.pi / 2)
    
    # Sampling loop
    x = np.zeros(N)
    accept = np.zeros(N, dtype=bool)
    
    while not np.all(accept):
        ix = ~accept
        T = np.sum(ix)
        
        # Beta(k+1,k+1) rng
        x[ix] = np.pi * np.random.beta(k[ix] + 1, k[ix] + 1, size=T)
        
        # Check acceptance
        accept[ix] = np.log(np.random.rand(T)) / k[ix] < logconst + np.log(np.sin(x[ix])) - np.log(x[ix]) - np.log(np.pi - x[ix])
    
    return x

samples_hp = []
for i in range(n):
    samples_hp.append(hypersphere(d))
samples_hp = np.array(samples_hp)
rho_hp = n_matrices_mul(samples_hp, np.swapaxes(samples_hp, 1, 2))
D_hp = n_matrices_mul(sigma, samples_hp)
C_hp = n_matrices_mul(D_hp, np.swapaxes(D_hp, 1, 2))

fig, axs = plt.subplots(2, 2, figsize=(11, 10))
cax = axs[0, 0].imshow(np.mean(samples_perm, axis=0), vmin=0, vmax=1)
axs[0, 0].set_title('Perm')

cax = axs[0, 1].imshow(np.mean(samples_hemi, axis=0), vmin=0, vmax=1)
axs[0, 1].set_title('Hemi sphere')

cax = axs[1, 0].imshow(np.mean(samples_cvine, axis=0), vmin=0, vmax=1)
axs[1, 0].set_title('C vine')

cax = axs[1, 1].imshow(np.mean(samples_hp, axis=0), vmin=0, vmax=1)
axs[1, 1].set_title('Hyperspherical param')

fig.colorbar(cax, ax=axs)

<matplotlib.colorbar.Colorbar at 0x1bdac60f0e0>

plt.plot(np.diag(np.mean(samples_perm, axis=0)), label='Perm')
plt.plot(np.diag(np.mean(samples_hemi, axis=0)), label='Hemi sphere')
plt.plot(np.diag(np.mean(samples_cvine, axis=0)), label='C vine')
plt.plot(np.diag(np.mean(samples_hp, axis=0)), label='Hyperspherical param')
plt.xlim(0, d-1)
plt.xlabel('i')
plt.ylabel('Diagonal element')
plt.legend()
plt.title('Diagonal elements')

Text(0.5, 1.0, 'Diagonal elements')

idty = np.eye(d)
plt.hist(metric(samples_perm), bins=50,  label='perm', histtype='step', density=True, color='C0')
plt.plot(np.linalg.norm(idty - np.mean(samples_perm, axis=0))**2 * np.ones(2), [0, 1], color='C0')
plt.hist(metric(samples_hemi), bins=50, label='hemi sphere', histtype='step', density=True, color='C1')
plt.plot(np.linalg.norm(idty - np.mean(samples_hemi, axis=0))**2 * np.ones(2), [0, 1], color='C1')
plt.hist(metric(samples_cvine), bins=50, label='C vine', histtype='step', density=True, color='C2')
plt.plot(np.linalg.norm(idty - np.mean(samples_cvine, axis=0))**2 * np.ones(2), [0, 1], color='C2')
plt.hist(metric(samples_hp), bins=50, label='hypersphere param', histtype='step', density=True, color='C3')
plt.plot(np.linalg.norm(idty - np.mean(samples_hp, axis=0))**2 * np.ones(2), [0, 1], color='C3')
plt.legend()
plt.ylim(0, 1)
plt.xlim(0)
plt.title(r'Histogram of $\text{trace}((L - \bar{L})(L - \bar{L})^T)$')

Text(0.5, 1.0, 'Histogram of $\\text{trace}((L - \\bar{L})(L - \\bar{L})^T)$')

fig, axs = plt.subplots(2, 2, figsize=(11, 10))
cax = axs[0, 0].imshow(np.std(rho_perm, axis=0), vmin=0, vmax=1)
axs[0, 0].set_title('Perm')

axs[0, 1].imshow(np.std(rho_hemi, axis=0), vmin=0, vmax=1)
axs[0, 1].set_title('Hemi sphere')

axs[1, 0].imshow(np.std(rho_cvine, axis=0), vmin=0, vmax=1)
axs[1, 0].set_title('C vine')

axs[1, 1].imshow(np.std(rho_hp, axis=0), vmin=0, vmax=1)
axs[1, 1].set_title('Hyperspherical param')

fig.colorbar(cax, ax=axs)

<matplotlib.colorbar.Colorbar at 0x1bdb3dab6e0>

bins = np.arange(-1, 1 + 0.05, 0.05)
for i in range(d):
    for j in range(i):
        plt.hist(rho_perm[:, i, j], bins=bins, histtype='step', density=True)
        plt.hist(rho_hemi[:, i, j], bins=bins, histtype='step', density=True)
        plt.hist(rho_cvine[:, i, j], bins=bins, histtype='step', density=True)
        plt.hist(rho_hp[:, i, j], bins=bins, histtype='step', density=True)
        plt.title('rho {} {}'.format(i+1, j+1))
        plt.xlim(-1, 1)
        plt.show()
        plt.close()

fig, axs = plt.subplots(2, 2, figsize=(11, 10))
cax = axs[0, 0].imshow(np.mean(D_perm, axis=0), vmin=0, vmax=1)
axs[0, 0].set_title('Perm')

cax = axs[0, 1].imshow(np.mean(D_hemi, axis=0), vmin=0, vmax=1)
axs[0, 1].set_title('Hemi sphere')

cax = axs[1, 0].imshow(np.mean(D_cvine, axis=0), vmin=0, vmax=1)
axs[1, 0].set_title('C vine')

cax = axs[1, 1].imshow(np.mean(D_hp, axis=0), vmin=0, vmax=1)
axs[1, 1].set_title('Hyperspherical param')

fig.colorbar(cax, ax=axs)

<matplotlib.colorbar.Colorbar at 0x1bdba6b2cf0>

plt.plot(np.diag(np.mean(D_perm, axis=0)), label='Perm')
plt.plot(np.diag(np.mean(D_hemi, axis=0)), label='Hemi sphere')
plt.plot(np.diag(np.mean(D_cvine, axis=0)), label='C vine')
plt.plot(np.diag(np.mean(D_hp, axis=0)), label='Hyperspherical param')
plt.xlim(0, d-1)
plt.xlabel('i')
plt.ylabel('Diagonal element')
plt.legend()
plt.title('Diagonal elements of D')

Text(0.5, 1.0, 'Diagonal elements of D')

idty = np.eye(d)
plt.hist(metric(D_perm), bins=50,  label='perm D', histtype='step', density=True, color='C0')
plt.hist(metric(D_hemi), bins=50, label='hemi sphere D', histtype='step', density=True, color='C1')
plt.hist(metric(D_cvine), bins=50, label='C vine D', histtype='step', density=True, color='C2')
plt.hist(metric(D_hp), bins=50, label='hypersphere param D', histtype='step', density=True, color='C3')
plt.hist(metric_D(sigma, D_perm), bins=50,  label=r'perm $\sigma$', histtype='step', density=True, color='C0', linestyle='--', alpha=0.5)
plt.hist(metric_D(sigma, D_hemi), bins=50, label=r'hemi sphere $\sigma$', histtype='step', density=True, color='C1', linestyle='--', alpha=0.5)
plt.hist(metric_D(sigma, D_cvine), bins=50, label=r'C vine $\sigma$', histtype='step', density=True, color='C2', linestyle='--', alpha=0.5)
plt.hist(metric_D(sigma, D_hp), bins=50, label=r'hypersphere param $\sigma$', histtype='step', density=True, color='C3', linestyle='--', alpha=0.5)
plt.legend()
plt.xlim(0)

(0.0, 4.480529331025875)

fig, axs = plt.subplots(2, 2, figsize=(11, 10))
cax = axs[0, 0].imshow(np.std(C_perm, axis=0), vmin=0, vmax=1)
axs[0, 0].set_title('Perm')

axs[0, 1].imshow(np.std(C_hemi, axis=0), vmin=0, vmax=1)
axs[0, 1].set_title('Hemi sphere')

axs[1, 0].imshow(np.std(C_cvine, axis=0), vmin=0, vmax=1)
axs[1, 0].set_title('C vine')

axs[1, 1].imshow(np.std(C_hp, axis=0), vmin=0, vmax=1)
axs[1, 1].set_title('Hyperspherical param')

fig.colorbar(cax, ax=axs)

<matplotlib.colorbar.Colorbar at 0x1bdba60b9b0>

for i in range(d):
    for j in range(i+1):
        plt.hist(C_perm[:, i, j], bins=bins, histtype='step', density=True, log=True)
        plt.hist(C_hemi[:, i, j], bins=bins, histtype='step', density=True, log=True)
        plt.hist(C_cvine[:, i, j], bins=bins, histtype='step', density=True, log=True)
        plt.hist(C_hp[:, i, j], bins=bins, histtype='step', density=True, log=True)
        plt.title('C {} {}'.format(i+1, j+1))
        plt.xlim(-0.81, 0.81)
        plt.show()
        plt.close()

Sampling correlation matrices¶

1. Introduction¶

2. Implementation¶

2.1 Uniform sampling with permutation¶

2.2 Uniform sampling on hemispheres¶

Remark: comparing permutation method and hemisphere method.¶

2.3 C vine¶

2.4 Hyperspherical Parameterization¶

3. Benchmark¶

3.1 $L$ and $\rho$¶

3.2 Numerical $D$ and $C$¶

4. Conclusion¶