/* ld_test.c
 *
 * Copyright (C) 2005, 2006, 2007 Stephane Germain
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

/**
   \file
   \brief Log likelihood ratio test for local independance.
   \author Stephane Germain <germste@gmail.com>
*/

#include "libirt.h"
#if HAVE_CONFIG_H
#  include <config.h>
#endif
#include <math.h>
#include <gsl/gsl_cdf.h>

/**
   \brief Compute the log likelihood ratio statistics of each items pairs
   for local dependance.

   @param[in] patterns A matrix(subjects x items) with binary response.
   @param[in] probs A matrix(items x classes) with response functions.
   @param[in] quad_weights A vector(classes) with the normal weights of each class.
   @param[out] chi2 A matrix(items x items) with the statistics of each items pairs.
   @param[out] df A vector(items x items) with the degrees of freedom.
   @param[out] p_value A matrix(items x items) with the p-values of each items pairs.

   \warning The memory for the outputs should be allocated before.
*/
void
llk_ratio_ld_test (gsl_matrix_int * patterns, gsl_matrix *probs,
		   gsl_vector *quad_weights, gsl_matrix *chi2, 
		   gsl_matrix_int *df, gsl_matrix *p_value)
{
  int nbr_quad, nbr_item, i1, i2, j, k, nbr_subject, o1, o2, nbr, obs;
  double weight, prob1, prob2, tmp;
  gsl_matrix * expected_table;
  gsl_matrix_int * observed_table;

  nbr_subject = patterns->size1;
  nbr_quad = probs->size2;
  nbr_item = probs->size1;

  expected_table = gsl_matrix_alloc(2*nbr_item, 2*nbr_item);
  observed_table = gsl_matrix_int_alloc(2*nbr_item, 2*nbr_item);

  gsl_matrix_set_all(chi2, 0);
  gsl_matrix_set_all(p_value, 1);
  gsl_matrix_set_all(expected_table, 0);
  gsl_matrix_int_set_all(observed_table, 0);

  /* compute the observed contingency tables */
  for (j = 0; j < nbr_subject; j++)
    {
      for (i1 = 0; i1 < nbr_item - 1; i1++) 
	{
	  o1 = gsl_matrix_int_get(patterns, j, i1);
	  if (o1 != BLANK)
	    for (i2 = i1+1; i2 < nbr_item; i2++) 
	      {
		o2 = gsl_matrix_int_get(patterns, j, i2);
		if (o2 != BLANK)
		  gsl_matrix_int_set(observed_table, 2*i1+o1, 2*i2+o2,
				     1+gsl_matrix_int_get(observed_table, 
							  2*i1+o1, 2*i2+o2));
	      }
	}
    }

  /* compute the expected contingency tables */
  for (k = 0; k < nbr_quad; k++)
    {
      weight = gsl_vector_get(quad_weights, k);
      for (i1 = 0; i1 < nbr_item - 1; i1++) 
	{
	  prob1 = gsl_matrix_get(probs, i1, k);
	  for (o1 = 0; o1 <= 1; o1++)
	    {
	      for (i2 = i1+1; i2 < nbr_item; i2++) 
		{
		  prob2 = gsl_matrix_get(probs, i2, k);
		  for (o2 = 0; o2 <= 1; o2++)
		    {
		      tmp = (o1?prob1:(1-prob1))
			* (o2?prob2:(1-prob2))
			* weight;
		      gsl_matrix_set(expected_table, 2*i1+o1, 2*i2+o2,
				     tmp+gsl_matrix_get(expected_table, 
							2*i1+o1, 2*i2+o2));
		    }
		}
	    }
	}
    }

  /* compute the llk ratio statistics */
  for (i1 = 0; i1 < nbr_item - 1; i1++) 
    {
      for (i2 = i1+1; i2 < nbr_item; i2++) 
	{
	  tmp = 0;
	  nbr = 0;
	  for (o1 = 0; o1 <= 1; o1++)
	    {
	      for (o2 = 0; o2 <= 1; o2++)
		{
		  obs = gsl_matrix_int_get(observed_table, 2*i1+o1, 2*i2+o2);
		  nbr += obs;
		  if (obs)
		    tmp += 2 * obs
		      * log(obs/gsl_matrix_get(expected_table, 2*i1+o1, 2*i2+o2));
		}
	    }
	  tmp -= 2 * nbr * log(nbr);
	  gsl_matrix_set(chi2, i1, i2, tmp);
	  gsl_matrix_set(chi2, i2, i1, tmp);
	  gsl_matrix_int_set(df, i1, i2, 1);
	  gsl_matrix_int_set(df, i2, i1, 1);
	  tmp = 1-gsl_cdf_chisq_P(tmp, 1);
	  gsl_matrix_set(p_value, i1, i2, tmp);
	  gsl_matrix_set(p_value, i2, i1, tmp);
	}
    }

  gsl_matrix_free(expected_table);
  gsl_matrix_int_free(observed_table);
}

/**
   \brief Compute the log likelihood ratio statistics of each multiple choice
   items pairs for local dependance.

   @param[in] patterns A matrix(subjects x items) with the responses.
   @param[in] probs A matrix(options x classes) with response functions.
   @param[in] nbr_options A vector(items) with the number of option of each items.
   @param[in] items_pos A vector(items) with the position of the first option of each item.
   @param[in] quad_weights A vector(classes) with the normal weights of each class.
   @param[out] chi2 A matrix(items x items) with the statistics of each items pairs.
   @param[out] df A vector(items x items) with the degrees of freedom.
   @param[out] p_value A matrix(items x items) with the p-values of each items pairs.

   \warning The memory for the outputs should be allocated before.
*/
void
llk_ratio_ld_test_mc (gsl_matrix_int * patterns, gsl_matrix *probs,
		      gsl_vector *quad_weights,
		      gsl_vector_int *nbr_options, gsl_vector_int *items_pos,
		      gsl_matrix *chi2, gsl_matrix_int *df, gsl_matrix *p_value)
{
  int nbr_quad, nbr_item, i1, i2, j, k, nbr_subject, nbr_option_tot,
    o1, o2, nbr, obs, pos1, pos2, nbr1, nbr2;
  double weight, prob1, prob2, tmp;
  gsl_matrix * expected_table;
  gsl_matrix_int * observed_table;

  nbr_subject = patterns->size1;
  nbr_item = patterns->size2;
  nbr_option_tot = probs->size1;
  nbr_quad = probs->size2;

  expected_table = gsl_matrix_alloc(nbr_option_tot, nbr_option_tot);
  observed_table = gsl_matrix_int_alloc(nbr_option_tot, nbr_option_tot);

  gsl_matrix_set_all(chi2, 0);
  gsl_matrix_set_all(p_value, 1);
  gsl_matrix_set_all(expected_table, 0);
  gsl_matrix_int_set_all(observed_table, 0);

  /* compute the observed contingency tables */
  for (j = 0; j < nbr_subject; j++)
    {
      for (i1 = 0; i1 < nbr_item - 1; i1++) 
	{
	  pos1 = gsl_vector_int_get(items_pos, i1);
	  o1 = gsl_matrix_int_get(patterns, j, i1);
	  if (o1 != BLANK)
	    {
	      o1--;
	      for (i2 = i1+1; i2 < nbr_item; i2++) 
		{
		  pos2 = gsl_vector_int_get(items_pos, i2);
		  o2 = gsl_matrix_int_get(patterns, j, i2);
		  if (o2 != BLANK)
		    {
		      o2--;
		      gsl_matrix_int_set(observed_table, pos1+o1, pos2+o2,
					 1+gsl_matrix_int_get(observed_table, 
							      pos1+o1, pos2+o2));
		    }
		}
	    }
	}
    }

  /* compute the expected contingency tables */
  for (k = 0; k < nbr_quad; k++)
    {
      weight = gsl_vector_get(quad_weights, k);
      for (i1 = 0; i1 < nbr_item - 1; i1++) 
	{
	  pos1 = gsl_vector_int_get(items_pos, i1);
	  nbr1 = gsl_vector_int_get(nbr_options, i1);
	  for (o1 = 0; o1 < nbr1; o1++)
	    {
	      prob1 = gsl_matrix_get(probs, pos1+o1, k);
	      for (i2 = i1+1; i2 < nbr_item; i2++) 
		{
		  pos2 = gsl_vector_int_get(items_pos, i2);
		  nbr2 = gsl_vector_int_get(nbr_options, i2);
		  for (o2 = 0; o2 < nbr2; o2++)
		    {
		      prob2 = gsl_matrix_get(probs, pos2+o2, k);
		      tmp = prob1 * prob2 * weight;
		      gsl_matrix_set(expected_table, pos1+o1, pos2+o2,
				     tmp+gsl_matrix_get(expected_table, 
							pos1+o1, pos2+o2));
		    }
		}
	    }
	}
    }

  /* compute the llk ratio statistics */
  for (i1 = 0; i1 < nbr_item - 1; i1++) 
    {
      pos1 = gsl_vector_int_get(items_pos, i1);
      nbr1 = gsl_vector_int_get(nbr_options, i1);
      for (i2 = i1+1; i2 < nbr_item; i2++) 
	{
	  pos2 = gsl_vector_int_get(items_pos, i2);
	  nbr2 = gsl_vector_int_get(nbr_options, i2);
	  tmp = 0;
	  nbr = 0;
	  for (o1 = 0; o1 < nbr1; o1++)
	    {
	      for (o2 = 0; o2 < nbr2; o2++)
		{
		  obs = gsl_matrix_int_get(observed_table, pos1+o1, pos2+o2);
		  nbr += obs;
		  if (obs)
		    tmp += 2 * obs
		      * log(obs/gsl_matrix_get(expected_table, pos1+o1, pos2+o2));
		}
	    }
	  tmp -= 2 * nbr * log(nbr);
	  gsl_matrix_set(chi2, i1, i2, tmp);
	  gsl_matrix_set(chi2, i2, i1, tmp);
	  gsl_matrix_int_set(df, i1, i2, (nbr1-1)*(nbr2-1));
	  gsl_matrix_int_set(df, i2, i1, gsl_matrix_int_get(df, i1, i2));
	  tmp = 1-gsl_cdf_chisq_P(tmp, gsl_matrix_int_get(df, i1, i2));
	  gsl_matrix_set(p_value, i1, i2, tmp);
	  gsl_matrix_set(p_value, i2, i1, tmp);
	}
    }

  gsl_matrix_free(expected_table);
  gsl_matrix_int_free(observed_table);
}

