import numpy as np
import matplotlib.pyplot as plt

def plot_images_and_matrix(final_img_arr, img_title_map, vision_text_matrix, output_path):
    """
    Plots images alongside their corresponding vision-text similarity matrix.

    Args:
        final_img_arr (list of torch.Tensor): List of image tensors to plot.
        img_title_map (dict): Dictionary mapping indices to image titles.
        vision_text_matrix (np.ndarray): Vision-text similarity matrix to plot.
        output_path (str): Path where to save the plot.
    """
    N = vision_text_matrix.shape[1]  # Assuming square matrix for simplicity

    plt.rcParams.update({'font.size': 12})
    fig, axs = plt.subplots(nrows=1, ncols=N+1, squeeze=False, figsize=(24, 5))
    
    for i, img_tensor in enumerate(final_img_arr):
        axs[0, i].set_title(img_title_map[i])
        axs[0, i].imshow(img_tensor.cpu().permute(1, 2, 0))
        axs[0, i].axis('off')

    # Plotting the vision-text matrix on the last subplot
    ax = axs[0, -1]
    ax.axis('off')
    ax.set_title('Vision x Text', x=0.5, y=0.5)
    cax = ax.matshow(vision_text_matrix, cmap='gray_r', vmin=0, vmax=1)
    plt.colorbar(cax, ax=ax, fraction=0.046, pad=0.04)
    ax.grid(True, which='both', linestyle='-', linewidth=2, color='k')
    ax.set_xticks(np.arange(-.5, N, 1), minor=True)
    ax.set_yticks(np.arange(-.5, N, 1), minor=True)

    # Adding text annotations for matrix values
    for i in range(N):
        for j in range(N):
            val = vision_text_matrix[i, j]
            text_color = 'white' if val > 0.5 else 'black'
            ax.text(j, i, f'{val:.2f}', va='center', ha='center', color=text_color, fontsize=7)

    plt.tight_layout()
    plt.savefig(output_path)
    plt.show()

# Usage example:
# Assuming `final_img_arr2` is a list of PyTorch tensors for the images,
# `img_title_map` maps indices to titles, for example img_title_map = {0: 'lily -> popcorn ', 1: 'grape -> popcorn', 2: 'high heels -> popcorn', 3: 'pumpkin -> popcorn', 4: 'alpaca -> popcorn'}, img_title_map = {0: 'cat -> dog', 1: 'cat -> tiger', 2: 'cat -> racing car', 3: 'cat -> panda', 4: 'cat -> zebra'}
# `mat2` is the vision-text similarity matrix (numpy array),
# and './output_figure.pdf' is the desired output path.
# plot_images_and_matrix(final_img_arr2, img_title_map, mat2, './output_figure.pdf')






