/* mmle_mixed.c
 *
 * Copyright (C) 2007 Stephane Germain
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

/**
   \file
   \brief Functions to estimate the multiple choice item
   parameters by MMLE (Maximal Marginal Likelihood) for mixed models.

   The overall objectif is to find the OCC (option characteristic curves)
   maximizing the ML (marginal likelihood). An EM (expectation-maximization)
   iterative algorithm is used.

   1. A grid of ability levels has to be fixed. Something like 32 values from
   -4 to 4 will do. Those are called the quadrature classes in the code and
   can be generated with the function "quadrature".

   2. A first approximation of the OCC has to be available.

   3. For each pattern (a response vector from one subject) the a posteriori
   probabilities of being in each quadrature classes is computed by the
   function "posteriors".

   4. The expected number of subject in each quadrature classes
   (quad_sizes), and for each option the expected number of subject in
   each quadrature classes having choosen this option (quad_freqs) are
   computed by the function "frequencies".

   5. Once these quantities are assumed to be known, the log-likelihood can be
   maximized independantly for each item. The maximization is done by a root
   finding algorithm in the function "mle_2plm_mc". A variant of Newton-Raphson
   from the gsl library is used. For that we must have a function giving the
   firsts (gradient) and seconds (hessian) derivatives of the log-likelihood by
   the item parameters, those are computed in "like_2plm_mc_fdfdf2".

   6. Steps 3-5 are repeated until convergence is achieved.

   \author Stephane Germain <germste@gmail.com>
*/

#include "libirt.h"

#include <stdio.h>
#include <math.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_multiroots.h>
#include <gsl/gsl_linalg.h>

/**
   \brief Compute the response functions for a mixed model on all the items.
   
   @param[in] slopes A vector(options) with the slope parameters of each option.
   @param[in] thresholds A vector(options) with the threshold parameters of each option.
   @param[in] nbr_options A vector(items) with the number of option of each items.
   @param[in] items_pos A vector(items) with the position of the first option of each item
   in patterns.
   @param[in] quad_points A vector(classes) with the middle points of each quadrature class.
   @param[out] probs A matrix(options x classes) with the response functions.
   @param[out] bounds A matrix(options x classes) with the boundary functions of the graded items (optional).
   
   \todo Stddev of the probs

   \warning The memory for \em probs and \em bounds should be allocated before.
*/
void
probs_2plm_mixed (gsl_vector * slopes, gsl_vector * thresholds,
		  gsl_vector_int * items_models, gsl_vector_int * nbr_options, gsl_vector_int * items_pos,
		  gsl_vector * quad_points, gsl_matrix * probs, gsl_matrix * bounds)
{
  int nbr_quad, nbr_item, nbr_option, pos, i, model;
  gsl_vector_view slopes_i, thresholds_i;
  gsl_matrix_view probs_i, bounds_i;

  nbr_quad = quad_points->size;
  nbr_item = nbr_options->size;

  /* for each item */
  for (i = 0; i < nbr_item; i++)
    {
      model = gsl_vector_int_get(items_models, i);
      pos = gsl_vector_int_get(items_pos, i);
      nbr_option = gsl_vector_int_get(nbr_options, i);
      slopes_i = gsl_vector_subvector(slopes, pos, nbr_option);
      thresholds_i = gsl_vector_subvector(thresholds, pos, nbr_option);
      probs_i = gsl_matrix_submatrix(probs, pos, 0, nbr_option, nbr_quad);
      if(bounds) bounds_i = gsl_matrix_submatrix(bounds, pos+i, 0, nbr_option+1, nbr_quad);
      switch(model) {
      case CODE_NOMINAL:
	prob_2plm_mc(&slopes_i.vector, &thresholds_i.vector, quad_points, &probs_i.matrix);
	break;
      case CODE_GRADED:
	prob_2plm_grad(&slopes_i.vector, &thresholds_i.vector, quad_points, &probs_i.matrix, bounds?&bounds_i.matrix:NULL);
	break;
      }
    }
}

/**
   \brief Estimate the options response functions by MMLE (Maximum Marginal Likelihood).

   @param[in] max_em_iter The maximum number of EM iterations. At least 20 iteration are made.
   @param[in] max_nr_iter The maximum number of Newton iterations performed
   for each item at each EM iteration.
   @param[in] prec The relative change in the likelihood to stop the EM algorithm.
   This value divided by 10 is also the desired precision of each parameter estimate.
   @param[in] patterns A matrix(patterns x options) of binary responses.
   @param[in] counts A vector(patterns) with the count of each pattern.
   If NULL the counts are assumed to be all 1.
   @param[in] quad_points A vector(classes) with the middle points of each quadrature class.
   @param[in] quad_weights A vector(classes) with the prior weights of each quadrature class.
   @param[in] items_models A vector(items) with the model number of each item (1=nominal, 2=graded).
   @param[in] items_pos A vector(items) with the position of the first option of each item
   in patterns (and probs).
   @param[in] nbr_options A vector(items) with the number of option of each item
   in patterns (and probs).
   @param[in,out] thresholds A vector(options) with the estimated thresholds.
   They should be initialize first.
   @param[out] thresh_stddev A vector(options) with the estimated thresholds standard deviation.
   @param[in,out] slopes A vector(options) with the estimated slopes.
   They should be initialize first.
   @param[out] slopes_stddev A vector(options) with the estimated slopes standard deviation.
   @param[in] ignore A vector(items) of ignore flag.
   @param[out] nbr_notconverge The number of items that didn't converged.
   @param[out] notconverge A vector(items) of flag set for the items that didn't converged.

   \return 1 if the relative change in the maximum log likelihood was less than prec
   else 0.
   
   \warning The memory for the outputs should be allocated before.
*/
int
mmle_mixed (int max_em_iter, int max_nr_iter, double prec,
	    gsl_matrix_int * patterns, gsl_vector * counts,
	    gsl_vector * quad_points, gsl_vector * quad_weights, 
	    gsl_vector_int * items_models, gsl_vector_int * items_pos, gsl_vector_int * nbr_options,
	    gsl_vector * thresholds, gsl_vector * thresh_stddev,
	    gsl_vector * slopes, gsl_vector * slopes_stddev,
	    gsl_vector_int * ignore,
	    int * nbr_notconverge, gsl_vector_int * notconverge)
{
  int em_iter, nbr_quad, nbr_pattern, nbr_item, nbr_option_tot, nbr_option,
    ret_val, k, j, i, o, pos, model;
  double nbr_subject, mllk, mllk_old=0, mllk_i;
  gsl_matrix *quad_freqs, *post;
  gsl_vector *quad_sizes;
  gsl_matrix_view quad_freqs_i, probs_i, bounds_i;
  gsl_vector_view thresh_i, thresh_stddev_i, slopes_i, slopes_stddev_i;
  like_2plm_mc_struct params;
  gsl_matrix *probs, *bounds;

  nbr_quad = quad_points->size;
  nbr_pattern = patterns->size1;
  nbr_option_tot = patterns->size2;
  nbr_item = items_pos->size;

  nbr_subject = 0;
  /* count the number of subject */
  for(j = 0; j < nbr_pattern; j++)
    nbr_subject += counts ? gsl_vector_get(counts, j) : 1;

  /* allocate the memory */
  quad_freqs = gsl_matrix_alloc (nbr_option_tot, nbr_quad);
  quad_sizes = gsl_vector_alloc (nbr_quad);
  post = gsl_matrix_alloc (nbr_pattern, nbr_quad);
  probs = gsl_matrix_alloc (nbr_option_tot, nbr_quad);
  bounds = gsl_matrix_alloc (nbr_option_tot+nbr_item, nbr_quad);

  /* EM iterations */

  for (em_iter = 1; em_iter <= max_em_iter; em_iter++)
    {
      /* E (estimation) step */

      if (libirt_verbose > 2)
	printf ("\nEM iteration %d\n", em_iter);

      /* compute the response functions */
      probs_2plm_mixed (slopes, thresholds, items_models, nbr_options, items_pos, quad_points, probs, bounds);

      /* compute the posterior prob */
      posteriors_mc (patterns, probs, nbr_options, items_pos, quad_weights, post);

      /* compute the expected sizes and frequencies */
      frequencies (patterns, counts, post, probs, quad_sizes, quad_freqs);

      /* print debugging information */
      if (libirt_verbose > 5)
	{
	  for (i = 0; i < nbr_item; i++)
	    for (j = 0; j < gsl_vector_int_get(nbr_options, i); j++)
	      {
		pos = gsl_vector_int_get(items_pos,i);
		printf("Probabilities for option %d of item %d :\n", j+1, i+1);
		for (k = 0; k < nbr_quad; k++)
		  printf(" %8.2e", gsl_matrix_get(probs, pos+j, k));
		printf("\n");
	      }
	  for (j = 0; j < nbr_pattern; j++)
	    {
	      printf("Posterior for pattern %d :\n", j+1);
	      for (k = 0; k < nbr_quad; k++)
		printf(" %8.2e", gsl_matrix_get(post,j,k));
	      printf("\n");
	    }
	  printf("Sizes :\n");
	  for (k = 0; k < nbr_quad; k++)
	    printf(" %8.2e", gsl_vector_get(quad_sizes,k));
	  printf("\n");
	  for (i = 0; i < nbr_item; i++)
	    for (j = 0; j < gsl_vector_int_get(nbr_options, i); j++)
	      {
		pos = gsl_vector_int_get(items_pos,i);
		printf("Frequencies for option %d of item %d :\n", j+1, i+1);
		for (k = 0; k < nbr_quad; k++)
		  printf(" %8.2e", gsl_matrix_get(quad_freqs,pos+j,k));
		printf("\n");
	      }
	}

      /* the number of item that do not converge */
      *nbr_notconverge = 0;

      mllk = 0;

      /* M (maximisation) step */
      for (i = 0; i < nbr_item; i++)
	{
	  /* ignore the degenerate items */
	  if (ignore && gsl_vector_int_get(ignore, i)) continue;

	  /* get the corresponding rows of freqs, sizes, probs and probs_stddev */
	  model = gsl_vector_int_get(items_models, i);
	  nbr_option = gsl_vector_int_get(nbr_options, i);
	  pos = gsl_vector_int_get(items_pos, i);
	  quad_freqs_i = gsl_matrix_submatrix(quad_freqs, pos, 0, nbr_option, nbr_quad);
	  thresh_i = gsl_vector_subvector(thresholds, pos, nbr_option);
	  thresh_stddev_i = gsl_vector_subvector(thresh_stddev, pos, nbr_option);
	  slopes_i = gsl_vector_subvector(slopes, pos, nbr_option);
	  slopes_stddev_i = gsl_vector_subvector(slopes_stddev, pos, nbr_option);
	  probs_i = gsl_matrix_submatrix(probs, pos, 0, nbr_option, nbr_quad);
	  bounds_i = gsl_matrix_submatrix(bounds, pos+i, 0, nbr_option+1, nbr_quad);
	  params.quad_freqs = &quad_freqs_i.matrix;
	  params.quad_sizes = quad_sizes;
	  params.quad_points = quad_points;
	  params.probs = &probs_i.matrix;
	  params.bounds = &bounds_i.matrix;

	  /* use a root finding algorithm */
	  if (libirt_verbose > 3)
	    printf ("item %d", i + 1);

	  switch(model) {
	  case CODE_NOMINAL:
	    ret_val = mle_2plm_mc
	      (max_nr_iter, prec/10, &params, &thresh_i.vector, &thresh_stddev_i.vector,
	       &slopes_i.vector, &slopes_stddev_i.vector, &mllk_i);
	    break;
	  case CODE_GRADED:
	    ret_val = mle_2plm_grad
	      (max_nr_iter, prec/10, &params, &thresh_i.vector, &thresh_stddev_i.vector,
	       &slopes_i.vector, &slopes_stddev_i.vector, &mllk_i);
	    /* copy the slope to each option */
	    for(o = 1; o < nbr_option; o++)
	      {
		gsl_vector_set(&slopes_i.vector, o, gsl_vector_get(&slopes_i.vector, 0));
		gsl_vector_set(&slopes_stddev_i.vector, o, gsl_vector_get(&slopes_stddev_i.vector, 0));
	      }
	    break;
	  }

	  *nbr_notconverge += ret_val;

	  mllk += mllk_i;
	  
	  gsl_vector_int_set(notconverge, i, ret_val);
	}

      if(gsl_isnan(mllk)) {
	if (libirt_verbose > 1) printf("NAN error ! Stopping.\n");
	break;
      }

      if (libirt_verbose > 2)
	printf("MLLK = %10.3e %%CHANGE = %9.3e\n", mllk, fabs((mllk-mllk_old)/mllk));

      /* if the change in the maximum log likelihood is small then exit */
      if (fabs((mllk-mllk_old)/mllk) < prec && em_iter >= 20) break;

      mllk_old = mllk;
    }

  /* check if the EM algo converged */
  if (em_iter <= max_em_iter && !gsl_isnan(mllk)) ret_val = 1;
  else ret_val = 0;

  if (libirt_verbose > 0 && ret_val == 0)
    printf("The EM algorithm didn't converged after %d iterations.\n", em_iter-1);

  if (libirt_verbose > 0 && ret_val == 1)
    printf("The EM algorithm converged after %d iterations.\n", em_iter);

  /* free the memory */
  gsl_matrix_free (quad_freqs);
  gsl_vector_free (quad_sizes);
  gsl_matrix_free (post);
  gsl_matrix_free (probs);
  gsl_matrix_free (bounds);

  return ret_val;
}
