"""
This is a tool to compute a modulated modularity.

The inputs include a sigma tuning parameter value,
the data csv input file that gives observations for the variables
to be clustered, and a csv input file that defines the clustering.

This script should be relatively fast because it does not include a search
and it is not trying to optimize anything.

"""
from __future__ import print_function, division, absolute_import

import argparse

import numpy as np
import pandas as pd

from libmmc import modularity_matrix, modularity, modulated_affinity_matrix


def main(args):

    # Read the data csv file as a 'pandas' data frame.
    data_df = pd.io.parsers.read_csv(args.data_in, index_col=0)

    # Compute the matrix of correlation coefficients.
    C = data_df.T.corr(method=args.correlation).values

    # For now, ignore the possibility that a variable
    # will have negligible variation.
    mask = np.ones(data_df.shape[0], dtype=bool)

    # Compute the modulated modularity matrix.
    A = modulated_affinity_matrix(C, args.sigma)
    d = A.sum()
    B = modularity_matrix(A)

    # Get the map from the name to the original row index.
    # This can probably be done more cleverly using pandas.
    all_row_names = data_df.index.values
    row_index_map = {s : i for i, s in enumerate(all_row_names)}

    # If some variables are uninformative for clustering,
    # the correlation matrix and the cluster vector will have smaller
    # dimensions than the number of rows in the original data frame.
    # This can probably be done more cleverly using pandas.
    remaining_row_names = data_df[mask].index.values

    # Read the clustering data frame.
    clustering_df = pd.io.parsers.read_csv(args.clustering_in, index_col=0)

    # Extract the clustering vector from the clustering data frame.
    clustering = clustering_df.loc[remaining_row_names, 'Module'].values

    # Compute the modulated modularity.
    m = modularity(B, d, clustering)

    print('modulated modularity:', m)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--verbose', '-v', action='store_true',
            help='Show more information on the screen.')
    parser.add_argument('--correlation',
            choices=('pearson', 'kendall', 'spearman'),
            default='pearson',
            help=(
                "Compute correlation coefficients using either "
                "'pearson' (standard correlation coefficient), "
                "'kendall' (Kendall Tau correlation coefficient), or "
                "'spearman' (Spearman rank correlation)."))
    parser.add_argument('--sigma', type=float, required=True,
            help=(
                'The value of the modulated modularity '
                'tuning parameter sigma.'))
    parser.add_argument('--data-in', required=True,
            help=(
                'Path to the data csv file, with row and column labels, '
                'and for which the rows are to be clustered.'))
    parser.add_argument('--clustering-in', required=True,
            help='Path to the clustering csv file.')
    main(parser.parse_args())
