/* classical.c
 *
 * Copyright (C) 2005, 2006, 2007 Stephane Germain
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

/**
   \file
   \brief Classical test theory.
   \author Stephane Germain <germste@gmail.com>
*/

#include "libirt.h"
#if HAVE_CONFIG_H
#  include <config.h>
#endif
#include <math.h>

/**
   \brief Compute the classical test theory statistics.

   @param[in] patterns A matrix(subjects x items) with the binary patterns.
   @param[out] items_mean A vector(items) with the means of each items.
   @param[out] items_sd A vector(items) with the sd of each items.
   @param[out] items_corr A vector(items) with the correlations 
   between each items and the score of each subject without the item.
   @param[out] items_bis_corr A vector(items) with the biserial correlations 
   between each items and the score of each subject without the item.
   @param[out] items_nbr A vector(items) with the number of non missing response
   for each items.
   @param[out] subjects_score A vector(subjects) with the scores of each subjects.
   @param[out] subjects_nbr A vector(subjects) with the number of non missing
   response of each subjects.
   @param[out] nbr The total number of non missing response.
   @param[out] mean The mean score.
   @param[out] sd The score sd.
   @param[out] alpha The fiability coefficient alpha.
   @param[out] pairs_corr A matrix(items x items) of correlation for each pair of items

   \warning The memory for the outputs should be allocated before.
*/
void 
classical_statistics(gsl_matrix_int * patterns,
					 gsl_vector * items_mean, gsl_vector * items_sd, 
					 gsl_vector * items_corr, gsl_vector * items_bis_corr, 
					 gsl_vector_int * items_nbr,
					 gsl_vector * subjects_score, gsl_vector_int * subjects_nbr,
					 int * nbr, double * mean, double * sd, double * alpha,
					 gsl_matrix * pairs_corr, gsl_matrix_int * freq_table)
{
	int i, i2, j, resp, resp2, subject_nbr, item_nbr, pair_nbr, freq,
    nbr_subject = patterns->size1, nbr_item = patterns->size2, nbr_subject_complete=0;
  double subject_score, item_mean, item_mean2, item_sd, item_corr, pair_corr, tot_var, bis_fact;
  gsl_vector_int_view column;
  *sd = 0;
  *alpha = 0;
  *nbr = 0;
  *mean = 0;
  gsl_vector_set_all(subjects_score, 0);
  gsl_vector_int_set_all(subjects_nbr, 0);
  gsl_vector_set_all(items_mean, 0);
  gsl_vector_set_all(items_corr, 0);
  gsl_vector_set_all(items_sd, 0);
  gsl_vector_int_set_all(items_nbr, 0);

  /* Compute the frequecies table */
  if(freq_table)
  {
	  gsl_matrix_int_set_all(freq_table, 0);
  }
  
  /* compute the means and counts */
  for( j = 0; j < nbr_subject; j++)
    {
      subject_score = 0;
      subject_nbr = 0;
      for( i = 0; i < nbr_item; i++)
	{
	  resp = gsl_matrix_int_get(patterns, j, i);

	  item_mean = gsl_vector_get(items_mean, i);
	  item_nbr = gsl_vector_int_get(items_nbr, i);

	  if(freq_table)
	  {
		  freq = gsl_matrix_int_get(freq_table, i, (resp==BLANK)?0:(resp+1));
		  freq++;
		  gsl_matrix_int_set(freq_table, i, (resp==BLANK)?0:(resp+1), freq);
	  }

	  if(resp != BLANK) 
	    {
	      subject_nbr++;
	      item_nbr++;
	      subject_score += resp;
	      item_mean += resp;
	    }

	  gsl_vector_set(items_mean, i, item_mean);
	  gsl_vector_int_set(items_nbr, i, item_nbr);
	}

      if(subject_nbr==nbr_item) nbr_subject_complete++;

      gsl_vector_set(subjects_score, j, subject_score);
      gsl_vector_int_set(subjects_nbr, j, subject_nbr);
      *nbr += subject_nbr;
    }

  /* adjust the means */
  for( i = 0; i < nbr_item; i++)
    {
      item_mean = gsl_vector_get(items_mean, i);
      item_nbr = gsl_vector_int_get(items_nbr, i);
      if(item_nbr > 0) item_mean /= item_nbr;
      *mean += item_mean;
      gsl_vector_set(items_mean, i, item_mean);
    }

  /* compute the covariance matrix and alpha */
  tot_var = 0;
  for( i = 0; i < nbr_item; i++)
    {
      item_mean = gsl_vector_get(items_mean, i);
      for( i2 = 0; i2 < nbr_item; i2++)
	{
	  item_mean2 = gsl_vector_get(items_mean, i2);
	  pair_corr = 0;
	  pair_nbr = 0;
	  for( j = 0; j < nbr_subject; j++)
	    {
	      resp = gsl_matrix_int_get(patterns, j, i);
	      resp2 = gsl_matrix_int_get(patterns, j, i2);
	      if(resp != BLANK && resp2 != BLANK) 
		{
		  pair_nbr++;
		  pair_corr += (resp - item_mean) * (resp2 - item_mean2);
		}
	    }
	  if(pair_nbr > 0) pair_corr /= pair_nbr;
          tot_var += pair_corr;
	  gsl_matrix_set(pairs_corr, i, i2, pair_corr);
	}
      item_sd = gsl_matrix_get(pairs_corr, i, i);
      gsl_vector_set(items_sd, i, item_sd);
      *alpha += item_sd;
    }
  *alpha = (nbr_item/(nbr_item-1.0))*(1.0-*alpha/tot_var);
  *sd = sqrt(tot_var);

  /* adjust the item-tot correlations and item_sd */
  for( i = 0; i < nbr_item; i++)
    {
      item_corr = 0;
      item_sd = gsl_vector_get(items_sd, i);
      for( i2 = 0; i2 < nbr_item; i2++)
	{
	  pair_corr = gsl_matrix_get(pairs_corr, i, i2);
	  item_corr += pair_corr;
	}
      item_corr = (item_corr-item_sd)/sqrt(item_sd*(tot_var-2*item_corr+item_sd));
      gsl_vector_set(items_corr, i, item_corr);
      gsl_vector_set(items_sd, i, sqrt(item_sd));
      column = gsl_matrix_int_column (patterns, i);
      bis_fact = polyserial_factor(&column.vector, 2, 0, NULL);
      item_corr *= bis_fact;
      item_corr = (item_corr>1)?1:item_corr;
      item_corr = (item_corr<-1)?-1:item_corr;
      gsl_vector_set(items_bis_corr, i, item_corr);
    }

  /* adjust the correlation matrix */
  for( i = 0; i < nbr_item; i++)
    {
      for( i2 = 0; i2 < nbr_item; i2++)
	{
	  pair_corr = gsl_matrix_get(pairs_corr, i, i2);
	  pair_corr /= gsl_vector_get(items_sd, i) * gsl_vector_get(items_sd, i2);
	  gsl_matrix_set(pairs_corr, i, i2, pair_corr);
	}
    }

}

/**
   \brief Compute the classical test theory statistics.

   @param[in] patterns A matrix(subjects x items) with the multiple choice patterns.
   @param[in] options_weights A vector(options) of weights to compute the score.
   @param[in] items_pos A vector(items) with the position of the first option of each item
   in patterns (and probs).
   @param[in] nbr_options A vector(items) with the number of option of each item
   in patterns (and probs).
   @param[out] items_mean A vector(items) with the means of each items.
   @param[out] items_sd A vector(items) with the sd of each items.
   @param[out] items_corr A vector(items) with the correlations 
   between each items and the score of each subject without the item.
   @param[out] items_poly_corr A vector(items) with the polyserial correlations 
   between each items and the score of each subject without the item.
   @param[out] items_nbr A vector(items) with the number of non missing response
   for each items.
   @param[out] subjects_score A vector(subjects) with the scores of each subjects.
   @param[out] subjects_nbr A vector(subjects) with the number of non missing
   response of each subjects.
   @param[out] nbr The total number of non missing response.
   @param[out] mean The mean score.
   @param[out] sd The score sd.
   @param[out] alpha The fiability coefficient alpha.
   @param[out] pairs_corr A matrix(items x items) of correlation for each pair of items

   \warning The memory for the outputs should be allocated before.
*/
void 
classical_statistics_mc(gsl_matrix_int * patterns, gsl_vector * options_weights,
						gsl_vector_int * items_pos, gsl_vector_int * nbr_options,
						gsl_vector * items_mean, gsl_vector * items_sd, 
						gsl_vector * items_corr, gsl_vector * items_poly_corr, 
						gsl_vector_int * items_nbr,
						gsl_vector * subjects_score, gsl_vector_int * subjects_nbr,
						int * nbr, double * mean, double * sd, double * alpha,
						gsl_matrix * pairs_corr, gsl_vector_int * freq_table)
{
	int i, i2, j, resp, resp2, subject_nbr, item_nbr, pair_nbr, pos, pos2, nbr_opt, freq,
    nbr_subject = patterns->size1, nbr_item = nbr_options->size, nbr_subject_complete=0;
  double subject_score, item_mean, item_mean2, item_sd, item_corr, resp_w, resp2_w,
    pair_corr, tot_var, poly_fact;
  gsl_vector_int_view column;
  gsl_vector_view weights;

  *sd = 0;
  *alpha = 0;
  *nbr = 0;
  *mean = 0;
  gsl_vector_set_all(subjects_score, 0);
  gsl_vector_int_set_all(subjects_nbr, 0);
  gsl_vector_set_all(items_mean, 0);
  gsl_vector_set_all(items_corr, 0);
  gsl_vector_set_all(items_sd, 0);
  gsl_vector_int_set_all(items_nbr, 0);

  /* Compute the frequecies table */
  if(freq_table)
  {
	  gsl_vector_int_set_all(freq_table, 0);
  }
  
  /* compute the means and counts */
  for( j = 0; j < nbr_subject; j++)
    {
      subject_score = 0;
      subject_nbr = 0;
      for( i = 0; i < nbr_item; i++)
	{
	  pos = gsl_vector_int_get(items_pos, i);
	  resp = gsl_matrix_int_get(patterns, j, i);

	  item_mean = gsl_vector_get(items_mean, i);
	  item_nbr = gsl_vector_int_get(items_nbr, i);

	  if(freq_table)
	  {
		  freq = gsl_vector_int_get(freq_table, pos + i + ((resp==BLANK)?0:resp));
		  freq++;
		  gsl_vector_int_set(freq_table, pos + i + ((resp==BLANK)?0:resp), freq);
	  }
	  
	  if(resp != BLANK) 
	    {
	      resp_w = gsl_vector_get(options_weights, pos+resp-1);
	      subject_nbr++;
	      item_nbr++;
	      subject_score += resp_w;
	      item_mean += resp_w;
	    }

	  gsl_vector_set(items_mean, i, item_mean);
	  gsl_vector_int_set(items_nbr, i, item_nbr);
	}

      if(subject_nbr==nbr_item) nbr_subject_complete++;

      gsl_vector_set(subjects_score, j, subject_score);
      gsl_vector_int_set(subjects_nbr, j, subject_nbr);
      *nbr += subject_nbr;
    }

  /* adjust the means */
  for( i = 0; i < nbr_item; i++)
    {
      item_mean = gsl_vector_get(items_mean, i);
      item_nbr = gsl_vector_int_get(items_nbr, i);
      if(item_nbr > 0) item_mean /= item_nbr;
      *mean += item_mean;
      gsl_vector_set(items_mean, i, item_mean);
    }

  /* compute the covariance matrix and alpha */
  tot_var = 0;
  for( i = 0; i < nbr_item; i++)
    {
      item_mean = gsl_vector_get(items_mean, i);
      pos = gsl_vector_int_get(items_pos, i);
      for( i2 = 0; i2 < nbr_item; i2++)
	{
	  item_mean2 = gsl_vector_get(items_mean, i2);
	  pos2 = gsl_vector_int_get(items_pos, i2);
	  pair_corr = 0;
	  pair_nbr = 0;
	  for( j = 0; j < nbr_subject; j++)
	    {
	      resp = gsl_matrix_int_get(patterns, j, i);
	      resp2 = gsl_matrix_int_get(patterns, j, i2);
	      if(resp != BLANK && resp2 != BLANK) 
		{
		  pair_nbr++;
		  resp_w = gsl_vector_get(options_weights, pos+resp-1);
		  resp2_w = gsl_vector_get(options_weights, pos2+resp2-1);
		  pair_corr += (resp_w - item_mean) * (resp2_w - item_mean2);
		}
	    }
	  if(pair_nbr > 0) pair_corr /= pair_nbr;
          tot_var += pair_corr;
	  gsl_matrix_set(pairs_corr, i, i2, pair_corr);
	}
      item_sd = gsl_matrix_get(pairs_corr, i, i);
      gsl_vector_set(items_sd, i, item_sd);
      *alpha += item_sd;
    }
  *alpha = (nbr_item/(nbr_item-1.0))*(1.0-*alpha/tot_var);
  *sd = sqrt(tot_var);

  /* adjust the item-tot correlations and item_sd */
  for( i = 0; i < nbr_item; i++)
    {
      nbr_opt = gsl_vector_int_get(nbr_options, i);
      pos = gsl_vector_int_get(items_pos, i);
      item_corr = 0;
      item_sd = gsl_vector_get(items_sd, i);
      for( i2 = 0; i2 < nbr_item; i2++)
	{
	  pair_corr = gsl_matrix_get(pairs_corr, i, i2);
	  item_corr += pair_corr;
	}
      item_corr = (item_corr-item_sd)/sqrt(item_sd*(tot_var-2*item_corr+item_sd));
      gsl_vector_set(items_corr, i, item_corr);
      gsl_vector_set(items_sd, i, sqrt(item_sd));
      column = gsl_matrix_int_column(patterns, i);
      weights = gsl_vector_subvector(options_weights, pos, nbr_opt);
      poly_fact = polyserial_factor(&column.vector, nbr_opt, 1, &weights.vector);
      item_corr *= poly_fact;
      item_corr = (item_corr>1)?1:item_corr;
      item_corr = (item_corr<-1)?-1:item_corr;
      gsl_vector_set(items_poly_corr, i, item_corr);
    }

  /* adjust the correlation matrix */
  for( i = 0; i < nbr_item; i++)
    {
      for( i2 = 0; i2 < nbr_item; i2++)
	{
	  pair_corr = gsl_matrix_get(pairs_corr, i, i2);
	  pair_corr /= gsl_vector_get(items_sd, i) * gsl_vector_get(items_sd, i2);
	  gsl_matrix_set(pairs_corr, i, i2, pair_corr);
	}
    }
}

