/* em_bme_3plm.c
 *
 * Copyright (C) 2005, 2006 Stephane Germain
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

/**
   \file
   \brief Functions to estimate the item parameters by MMLE/EM-BME.
   \author Stephane Germain <germste@gmail.com>

   The overall objectif is to find the ICC (item characteristic curves)
   maximizing the ML (marginal likelihood). An EM (expectation-maximization)
   iterative algorithm is used.

   1. A grid of ability levels has to be fixed. Something like 32 values from
   -4 to 4 will do. Those are called the quadrature classes in the code and
   can be generated with the function "quadrature".

   2. A first approximation of the ICC has to be available.  Almost anything
   will do. For example the slopes can be initialized to 1, the thresholds to
   0, and the asymptotes (if used) to 0.1.

   3. For each pattern (a response vector from one subject) the a posteriori
   probabilities of being in each quadrature classes is computed by the
   function "posteriors".

   4. The expected number of subject in each quadrature classes (quad_sizes),
   and for each item the expected number of subject in each quadrature classes
   having a success at this item (quad_freqs) are computed by the function
   "frequencies".

   5. Once these quantities are assumed to be known, the log-likelihood can be
   maximized independantly for each item. The maximization is done by a root
   finding algorithm in the function "bme_3plm". A variant of Newton-Raphson
   from the gsl library is used. For that we must have a function giving the
   firsts (gradient) and seconds (hessian) derivatives of the log-likelihood by
   the item parameters, those are computed in "bme_3plm_fdfdf2".

   6. Steps 3-5 are repeated until convergence is achieved.

*/

#include "libirt.h"

#include <stdio.h>
#include <math.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_multiroots.h>
#include <gsl/gsl_linalg.h>

/**
   \brief Compute the 3 parameter logistic function.

   @param[in] ability
   @param[in] slope
   @param[in] threshold
   @param[in] asymptote

   \return The 3PLM evaluated at \em ability.
*/
double
prob_3plm (double ability, double slope, double threshold, double asymptote)
{
  double prob;
  prob =  -slope * (ability - threshold);
  if(prob>0.5/VERY_SMALL_FREQ) prob =  asymptote;
  else prob =  asymptote + (1 - asymptote) / (1 + exp (prob));
  if(prob<VERY_SMALL_PROB) prob=VERY_SMALL_PROB;
  if(prob>1-VERY_SMALL_PROB) prob=1-VERY_SMALL_PROB;
  return prob;					
}

/**
   \brief Compute the response functions for a 3PLM.
   
   @param[in] slopes A vector(items) with the slope parameters of each item.
   @param[in] thresholds A vector(items) with the threshold parameters of each item.
   @param[in] asymptotes A vector(items) with the asymptote parameters of each item.
   Can be NULL for a 2PLM.
   @param[in] quad_points A vector(classes) with the middle points of each quadrature class.
   @param[out] probs A matrix(items x classes) with the response functions.
   
   \warning The memory for \em probs should be allocated before.

   \todo Stddev of the probs
*/
void
probs_3plm (gsl_vector * slopes, gsl_vector * thresholds,
	    gsl_vector * asymptotes, gsl_vector * quad_points,
	    gsl_matrix * probs)
{
  int nbr_quad, nbr_item, i, k;
  double prob;

  nbr_quad = quad_points->size;
  nbr_item = slopes->size;

  /* for each item */
  for (i = 0; i < nbr_item; i++)
    {
      /* for each class */
      for (k = 0; k < nbr_quad; k++)
	{
	  prob = prob_3plm (gsl_vector_get (quad_points, k),
			    gsl_vector_get (slopes, i),
			    gsl_vector_get (thresholds, i),
			    asymptotes ? gsl_vector_get (asymptotes, i) : 0);
	  gsl_matrix_set (probs, i, k, prob);
	}
    }
}

/**
   \brief Used to passed extra parameter to \em bme_3plm_fdfdf2.

   This is used to comply with the root finding functions in
   the gsl (GNU scientific library).
*/
typedef struct
{
  /** \brief The middle point of the quadrature classes. */
  gsl_vector *quad_points;

  /** \brief The expected number of subject in each quadrature classes. */
  gsl_vector *quad_sizes;

  /** \brief The expected number of subject in each quadrature classes having a success. */
  gsl_vector *quad_freqs;

  /** \brief Enable the use of a prior on the slope. */
  int slope_prior;

  /** \brief Enable the use of a prior on the threshold. */
  int thresh_prior;

  /** \brief Enable the use of a prior on the asymptote. */
  int asymp_prior;

  /** \brief The mean of the prior on the log-slope. */
  double log_slope_mean;

  /** \brief The variance of the prior on the log-slope. */
  double log_slope_var;

  /** \brief The mean of the prior on the threshold. */
  double thresh_mean;

  /** \brief The variance of the prior on the threshold. */
  double thresh_var;

  /** \brief The alpha of the prior on the asymptote. */
  double asymp_alpha;

  /** \brief The beta of the prior on the asymptote. */
  double asymp_beta;

  /** \brief The fixed asymptote if using the 2PLM */
  double asymptote;

  /** \brief The fixed slope if using the Rash model */
  double slope;

  /** \brief Disable the transformations of the slope and asymptote.
   This is used to get the estimated standard error of the parameter. */
  int no_trans;

} bme_3plm_struct;

/**
   \brief Compute the log likelihood, gradient and Hessian of the item parameter.

   @param[in] par_3plm The 3, 2 or 1 parameters to the 3PLM.
   @param[in] params The extra parameter to passes to the function.
   @param[out] f The log likelihood.
   @param[out] df The gradient of the log likelihood.
   @param[out] df2 The Hessian of the log likelihood.

   This function is not used directly by the root finding functions,
   but by others functions that comply with the gsl.

   \return GSL_SUCCESS for success.
*/
int
bme_3plm_fdfdf2 (const gsl_vector * par_3plm, void *params,
		 double *f, gsl_vector * df, gsl_matrix * df2)
{
  gsl_vector *quad_points = ((bme_3plm_struct *) params)->quad_points;
  gsl_vector *quad_sizes = ((bme_3plm_struct *) params)->quad_sizes;
  gsl_vector *quad_freqs = ((bme_3plm_struct *) params)->quad_freqs;
  double log_slope, slope, threshold, logit_asymp, asymptote;
  double L = 0, L1 = 0, L2 = 0, L3 = 0, L11 = 0, L12 = 0, L13 = 0, L22 =
    0, L23 = 0, L33 = 0;
  double size, freq, ability, prob, prob2, tmp, deriv_asymp;
  int j, nbr_quad, nbr_par;

  /* get the item parameter */
  nbr_par = par_3plm->size;
  threshold = gsl_vector_get (par_3plm, 0);
  if (nbr_par > 1)
    {
      log_slope = gsl_vector_get (par_3plm, 1);
      slope = exp (log_slope);
    }
  else
    {
      slope = ((bme_3plm_struct *) params)->slope;
    }
  if (nbr_par > 2)
    {
      logit_asymp = gsl_vector_get (par_3plm, 2);
      asymptote = logistic (logit_asymp);
    }
  else
    {
      asymptote = ((bme_3plm_struct *) params)->asymptote;
    }

  nbr_quad = quad_points->size;

  /* sum over the classes */
  for (j = 0; j < nbr_quad; j++)
    {
      ability = gsl_vector_get (quad_points, j);
      prob = prob_3plm (ability, slope, threshold, asymptote);
      prob2 = (prob - asymptote) / (1 - asymptote);
      size = gsl_vector_get (quad_sizes, j);
      freq = gsl_vector_get (quad_freqs, j);

      /* update the log likelihood */
      L += freq * log (prob);
      L += (size - freq) * log (1 - prob);

      /* update the gradient */
      tmp = (freq - size * prob) * prob2 / prob;
      L1 -= slope * tmp;
      L2 += tmp * (ability - threshold);
      L3 += tmp / (prob2 * (1 - asymptote));

      /* update the Hessian */
      tmp = size * prob2 * (1 - prob) / prob;
      L22 -= tmp * (ability - threshold) * (ability - threshold) * prob2;
      L11 -= slope * slope * tmp * prob2;
      L12 += slope * tmp * (ability - threshold) * prob2;
      L33 -= tmp / (prob2 * (1 - asymptote) * (prob - asymptote));
      L23 -= tmp * (ability - threshold) / (1 - asymptote);
      L13 += slope * tmp / (1 - asymptote);
    }

  if (nbr_par > 1 && !(((bme_3plm_struct *) params)->no_trans))
    {
      /* correction for the slope transformation */
      L22 *= slope * slope;
      /* This is not in Baker ! Why ? */
      L22 += L2 * slope;
      L12 *= slope;
      L23 *= slope;
      L2 *= slope;
    }

  /* priors corrections */
  if (nbr_par > 1 && ((bme_3plm_struct *) params)->slope_prior)
    {
      L2 -= (log_slope - ((bme_3plm_struct *) params)->log_slope_mean)
	/ ((bme_3plm_struct *) params)->log_slope_var;
      L22 -= 1 / ((bme_3plm_struct *) params)->log_slope_var;
    }
  if (((bme_3plm_struct *) params)->thresh_prior)
    {
      L1 -= (threshold - ((bme_3plm_struct *) params)->thresh_mean)
	/ ((bme_3plm_struct *) params)->thresh_var;
      L11 -= 1 / ((bme_3plm_struct *) params)->thresh_var;
    }
  if (nbr_par > 2 && ((bme_3plm_struct *) params)->asymp_prior)
    {
      L3 += (((bme_3plm_struct *) params)->asymp_alpha - 2) / asymptote
	- (((bme_3plm_struct *) params)->asymp_beta - 2) / (1 - asymptote);
      L33 -=
	(((bme_3plm_struct *) params)->asymp_alpha -
	 2) / (asymptote * asymptote) +
	(((bme_3plm_struct *) params)->asymp_beta -
	 2) / ((1 - asymptote) * (1 - asymptote));
    }

  /* correction for the asymptote transformation */
  if (nbr_par > 2 && !(((bme_3plm_struct *) params)->no_trans))
    {
      deriv_asymp = asymptote * asymptote * exp (-logit_asymp);
      L33 *= deriv_asymp * deriv_asymp;
      /* The equivalent for the slope is not in Baker ! Why ? */
      L33 += L3 * deriv_asymp * (2 * deriv_asymp / asymptote - 1);
      L23 *= deriv_asymp;
      L13 *= deriv_asymp;
      L3 *= deriv_asymp;
    }

  if (f){
    *f = L;
    if(gsl_isnan((*f))) return GSL_FAILURE;
  }

  if (df)
    {
      gsl_vector_set (df, 0, L1);
      if(gsl_isnan(L1)) return GSL_FAILURE;
      if (nbr_par > 1){
	gsl_vector_set (df, 1, L2);
	if(gsl_isnan(L2)) return GSL_FAILURE;
      }
      if (nbr_par > 2){
	gsl_vector_set (df, 2, L3);
	if(gsl_isnan(L3)) return GSL_FAILURE;
      }
    }

  if (df2)
    {
      gsl_matrix_set (df2, 0, 0, L11);
      if(gsl_isnan(L11)) return GSL_FAILURE;
      if (nbr_par > 1)
	{
	  gsl_matrix_set (df2, 0, 1, L12);
	  gsl_matrix_set (df2, 1, 0, L12);
	  if(gsl_isnan(L12)) return GSL_FAILURE;
	  gsl_matrix_set (df2, 1, 1, L22);
	  if(gsl_isnan(L22)) return GSL_FAILURE;
	}
      if (nbr_par > 2)
	{
	  gsl_matrix_set (df2, 0, 2, L13);
	  gsl_matrix_set (df2, 2, 0, L13);
	  if(gsl_isnan(L13)) return GSL_FAILURE;
	  gsl_matrix_set (df2, 1, 2, L23);
	  gsl_matrix_set (df2, 2, 1, L23);
	  if(gsl_isnan(L23)) return GSL_FAILURE;
	  gsl_matrix_set (df2, 2, 2, L33);
	  if(gsl_isnan(L33)) return GSL_FAILURE;
	}
    }

  return GSL_SUCCESS;
}

/**
   \brief Compute the log likelihood and gradient of the item parameter.

   @param[in] par_3plm The 3, 2 or 1 parameters to the 3PLM.
   @param[in] params The extra parameter to passes to the function.
   @param[out] f The log likelihood.
   @param[out] df The gradient of the log likelihood.

   This function is just a wrapper around \em bme_3plmfdfdf2
   to be used by the root finding functions in the gsl.
*/
int
bme_3plm_fdf (const gsl_vector * par_3plm, void *params,
	      double *f, gsl_vector * df)
{
  return bme_3plm_fdfdf2 (par_3plm, params, f, df, NULL);
}

/**
   \brief Compute the log likelihood gradient and Hessian of the item parameter.

   @param[in] par_3plm The 3, 2 or 1 parameters to the 3PLM.
   @param[in] params The extra parameter to passes to the function.
   @param[out] df The gradient of the log likelihood.
   @param[out] df2 The Hessian of the log likelihood.

   This function is just a wrapper around \em bme_3plmfdfdf2
   to be used by the root finding functions in the gsl.

   \return GSL_SUCCESS for success.
*/
int
bme_3plm_dfdf2 (const gsl_vector * par_3plm, void *params,
		gsl_vector * df, gsl_matrix * df2)
{
  return bme_3plm_fdfdf2 (par_3plm, params, NULL, df, df2);
}

/**
   \brief Compute the log likelihood of the item parameter.

   @param[in] par_3plm The 3, 2 or 1 parameters to the 3PLM.
   @param[in] params The extra parameter to passes to the function.

   This function is just a wrapper around \em bme_3plmfdfdf2
   to be used by the root finding functions in the gsl.

   \return The log likelihood.
*/
double
bme_3plm_f (const gsl_vector * par_3plm, void *params)
{
  double f;
  bme_3plm_fdfdf2 (par_3plm, params, &f, NULL, NULL);
  return f;
}

/**
   \brief Compute the log likelihood gradient of the item parameter.

   @param[in] par_3plm The 3, 2 or 1 parameters to the 3PLM.
   @param[in] params The extra parameter to passes to the function.
   @param[out] df The gradient of the log likelihood.

   This function is just a wrapper around \em bme_3plmfdfdf2
   to be used by the root finding functions in the gsl.

   \return GSL_SUCCESS for success.
*/
int
bme_3plm_df (const gsl_vector * par_3plm, void *params, gsl_vector * df)
{
  return bme_3plm_fdfdf2 (par_3plm, params, NULL, df, NULL);
}

/**
   \brief Compute the log likelihood Hessian of the item parameter.

   @param[in] par_3plm The 3, 2 or 1 parameters to the 3PLM.
   @param[in] params The extra parameter to passes to the function.
   @param[out] df2 The Hessian of the log likelihood.

   This function is just a wrapper around \em bme_3plmfdfdf2
   to be used by the root finding functions in the gsl.

   \return GSL_SUCCESS for success.
*/
int
bme_3plm_df2 (const gsl_vector * par_3plm, void *params, gsl_matrix * df2)
{
  return bme_3plm_fdfdf2 (par_3plm, params, NULL, NULL, df2);
}

/**
   \brief Does the maximization step of the EM algorithm to
   estimate the parameters of a 2PLM or 3PLM by MMLE
   or more generally (if a prior is used) by BME.

   @param[in] nbr_par The number of parameter (1, 2 or 3).
   @param[in] max_iter The maximum number of Newton iterations performed for each item.
   @param[in] prec The desired precision of each parameter estimate.
   @param[in] quad_freqs A matrix(items x classes) with the expected number of
   subjects in the class having a success at the item.
   @param[in] quad_points A vector(classes) with the middle points of each quadrature class.
   @param[in] quad_sizes A vector (classes) with the expected number of
   subjects in the class.
   @param[in] slope_prior Controls whether to use a lognormal(\em slope_mean, \em slope_dev)
   prior on the slope.
   @param[in] slope_mean The mean of the lognormal prior on the slope.
   @param[in] slope_dev The standard deviation of the lognormal prior on the slope.
   @param[in] thresh_prior Controls whether to use a normal(\em thresh_mean, \em thresh_dev)
   prior on the threshold
   @param[in] thresh_mean The mean of the normal prior on the threshold.
   @param[in] thresh_dev The standard deviation of the normal prior on the threshold.
   @param[in] asymp_prior Controls whether to use a beta(\em asymp_mean, \em asymp_weight)
   prior on the asymptote
   @param[in] asymp_mean The mean of the beta prior on the asymptote.
   The alpha and beta parameter are alpha = mean * weight + 1 and beta = (1 - mean) * weight + 1).
   @param[in] asymp_weight The weight of the beta prior on the asymptote.
   The alpha and beta parameter are alpha = mean * weight + 1 and beta = (1 - mean) * weight + 1).
   @param[in,out] slopes A vector(items) with the estimated slope parameters.
   They should be initialize first.
   @param[out] slopes_stddev A vector(items) with the standard errors of the estimated slopes.
   @param[in,out] thresholds A vector(items) with the estimated threshold parameters.
   They should be initialize first.
   @param[out] thresh_stddev A vector(items) with the standard errors of the estimated thresholds.
   @param[in,out] asymptotes A vector(items) with the estimated asymptote parameters.
   They should be initialize first.
   @param[out] asymp_stddev A vector(items) with the standard errors of the estimated asymptotes.
   @param[in] ignore A vector(items) of ignore flag.
   @param[out] notconverge A vector(items) of flag set for the items that didn't converged.
   @param[out] mllk The maximum log likelihood.

   \return The number of item that did not converged.
   
   \warning The memory for the output parameters should be allocated before.
*/
int
bme_3plm (int nbr_par, int max_iter, double prec,
	  gsl_matrix * quad_freqs, gsl_vector * quad_points,
	  gsl_vector * quad_sizes, int slope_prior, double slope_mean,
	  double slope_dev, int thresh_prior, double thresh_mean,
	  double thresh_dev, int asymp_prior, double asymp_mean,
	  double asymp_weight, gsl_vector * slopes, gsl_vector * thresholds,
	  gsl_vector * asymptotes, gsl_vector * slopes_stddev,
	  gsl_vector * thresh_stddev, gsl_vector * asymp_stddev,
	  gsl_vector_int * ignore, gsl_vector_int * notconverge,
	  double *mllk)
{
  const gsl_multiroot_fdfsolver_type *algo;
  gsl_multiroot_fdfsolver *solver;
  int status_iter, status_test, iter, nbr_item, i, ret_val, s, t, count_stalled;
  bme_3plm_struct params;
  gsl_multiroot_function_fdf FDF;
  gsl_vector *par_3plm = gsl_vector_alloc (nbr_par);
  gsl_vector_view quad_freqs_i;
  double ml, sum_grad;

  /* used to compute the standard errors */
  gsl_vector *df = gsl_vector_alloc (nbr_par);
  gsl_matrix *df2 = gsl_matrix_alloc (nbr_par, nbr_par);
  gsl_permutation *lu_perm = gsl_permutation_alloc (nbr_par);
  gsl_matrix *inv_df2 = gsl_matrix_alloc (nbr_par, nbr_par);
  int lu_sign;

  /* initalize the parameter and function to solve */
  params.no_trans = 0;
  nbr_item = quad_freqs->size1;
  params.quad_points = quad_points;
  params.slope_prior = slope_prior;
  params.log_slope_var = log(slope_dev * slope_dev / (slope_mean * slope_mean) + 1);
  params.log_slope_mean = log (slope_mean) - params.log_slope_var / 2;
  params.thresh_prior = thresh_prior;
  params.thresh_mean = thresh_mean;
  params.thresh_var = thresh_dev * thresh_dev;
  params.asymp_prior = asymp_prior;
  params.asymp_alpha = asymp_weight * asymp_mean + 1;
  params.asymp_beta = asymp_weight * (1 - asymp_mean) + 1;
  FDF.f = &bme_3plm_df;
  FDF.df = &bme_3plm_df2;
  FDF.fdf = &bme_3plm_dfdf2;
  FDF.n = nbr_par;
  FDF.params = &params;

  /* select the algorithm to used */
  algo = gsl_multiroot_fdfsolver_hybridsj;
  /* allocate the solver */
  solver = gsl_multiroot_fdfsolver_alloc (algo, nbr_par);

  ret_val = 0;

  /* for each item */
  for (i = 0; i < nbr_item; i++)
    {
      gsl_vector_int_set(notconverge, i, 0);

      /* ignore the degenerate items */
      if (ignore && gsl_vector_int_get(ignore, i)) continue;

      if (libirt_verbose > 3)
	printf ("item %d ", i + 1);

      /* set the starting values */
      gsl_vector_set (par_3plm, 0, gsl_vector_get (thresholds, i));

      if (nbr_par > 1)
	gsl_vector_set (par_3plm, 1, log (gsl_vector_get (slopes, i)));
      else
	params.slope = slopes ? gsl_vector_get (slopes, i) : 1.0;

      if (nbr_par > 2)
	gsl_vector_set (par_3plm, 2, logit (gsl_vector_get (asymptotes, i)));
      else
	params.asymptote = asymptotes ? gsl_vector_get (asymptotes, i) : 0.0;

      /* get the corresponding rows of freqs */
      quad_freqs_i = gsl_matrix_row (quad_freqs, i);
      params.quad_freqs = &quad_freqs_i.vector;
      params.quad_sizes = quad_sizes;

      /* set the solver */
      gsl_multiroot_fdfsolver_set (solver, &FDF, par_3plm);

      /* iterate the solver */
      iter = 0;
      count_stalled = 0;
      do
	{
	  iter++;

	  /* check if the hessian is singular */
	  status_iter = 0;
	  for (s = 0; s < nbr_par; s++)
	    {
	      if(0 == gsl_matrix_get (solver->J, s, s))
		{
		  status_iter = GSL_EBADFUNC;
		  break;
		}
	    }
	  if (status_iter) break;

	  status_iter = gsl_multiroot_fdfsolver_iterate (solver);

	  /* test for convergence */
	  status_test = gsl_multiroot_test_delta (solver->dx, solver->x, prec, 0);

	  if (libirt_verbose > 19)
	    {
	      bme_3plm_fdfdf2 (solver->x, &params, &ml, df, df2);
	      sum_grad = 0;
	      for (s = 0; s < nbr_par; s++) sum_grad += fabs(gsl_vector_get(df, s));
	      printf (" At N-R iteration %d ML is %9.2e and sum|grad(ML)| is %8.2e.\n",
		      iter, ml, sum_grad);
	    }

	  /* shake the solution if we are stalled or after half the iteration */
	  if (status_test != GSL_SUCCESS && 
	      (status_iter != GSL_SUCCESS || iter == max_iter / 2))
	    {
	      count_stalled++;
	      if (count_stalled % 4 == 0)
		{
		  gsl_vector_set (par_3plm, 0, -gsl_vector_get (par_3plm, 0));
		  if (libirt_verbose > 4)
		    printf(" Inversing the threshold after %d iterations.\n", iter);
		}
	      else if (count_stalled % 4 == 1 && nbr_par>1)
		{
		  gsl_vector_set (par_3plm, 1, -gsl_vector_get (par_3plm, 1));
		  if (libirt_verbose > 4)
		    printf(" Inversing the slope after %d iterations.\n", iter);
		}
	      else if (count_stalled % 4 == 2 && nbr_par>2)
		{
		  gsl_vector_set (par_3plm, 2, -gsl_vector_get (par_3plm, 2));
		  if (libirt_verbose > 4)
		    printf(" Inversing the asymptote after %d iterations.\n", iter);
		}
	      else
		{
		  for ( s = 0 ; s < nbr_par ; s++ ) gsl_vector_set (par_3plm, s, 0.0);
		  if (libirt_verbose > 4)
		    printf(" Reseting the parameters after %d iterations.\n", iter);
		}
	      gsl_multiroot_fdfsolver_set (solver, &FDF, par_3plm);
	    }
	}
      while (status_test == GSL_CONTINUE && iter < max_iter);

      /* compute the maximum log likelihood to return */
      bme_3plm_fdfdf2 (solver->x, &params, mllk, NULL, NULL);

      /* check if this item converged */
      if (status_test != GSL_SUCCESS)
	{
	  ret_val++;
	  gsl_vector_int_set(notconverge, i, 1);
	}

      if (libirt_verbose > 3)
	{
	  if (status_test == GSL_SUCCESS)
	    printf ("converged (success)");
	  else if (status_test == GSL_CONTINUE)
	    {
	      if (status_iter == GSL_SUCCESS)
		printf ("did not converged (max iter)");
	      else if (status_iter == GSL_EBADFUNC)
		printf ("did not converged (singular point)");
	      else if (status_iter == GSL_ENOPROG)
		printf ("did not converged (no progress)");
	      else
		printf ("unknow status");
	    }
	  printf (" after %d iterations.\n", iter);
	  fflush (stdout);
	}

      if (libirt_verbose > 9)
	{
	  bme_3plm_fdfdf2 (solver->x, &params, &ml, df, df2);
	  printf (" solution = (");
	  for (s=0; s<nbr_par; s++) printf("%7.1e,",gsl_vector_get(solver->x,s));
	  printf (").\n ML = %9.3e.\n Gradient = (", ml);
	  for (s=0; s<nbr_par; s++) printf("%7.1e,",gsl_vector_get(df,s));
	  printf(").\n Hessian = (");
	  for (s=0; s<nbr_par; s++)
	    {
	      printf(" (");
	      for (t=0; t<nbr_par; t++) printf("%7.1e,",gsl_matrix_get(df2,s,t));
	      printf("), ");
	    }
	  printf(").\n");
	  fflush (stdout);
	}

      /* if converge then transform and copy the solution found */
      if (status_test == GSL_SUCCESS)
	{
	  gsl_vector_set (thresholds, i, gsl_vector_get (solver->x, 0));
	  if (nbr_par > 1)
	    gsl_vector_set (slopes, i, exp (gsl_vector_get (solver->x, 1)));
	  if (nbr_par > 2)
	    gsl_vector_set (asymptotes, i,
			    logistic (gsl_vector_get (solver->x, 2)));
	  
	  /* compute the standard error */

	  /* get the Hessian with no transformation */
	  params.no_trans = 1;
	  bme_3plm_df2 (solver->x, &params, df2);
	  params.no_trans = 0;
	  
	  /* inverse it */
	  gsl_linalg_LU_decomp (df2, lu_perm, &lu_sign);
	  gsl_linalg_LU_invert (df2, lu_perm, inv_df2);
	  
	  if (thresh_stddev)
	    gsl_vector_set (thresh_stddev, i,
			    sqrt (-gsl_matrix_get (inv_df2, 0, 0)));
	  if (nbr_par > 1 && slopes_stddev)
	    gsl_vector_set (slopes_stddev, i,
			    sqrt (-gsl_matrix_get (inv_df2, 1, 1)));
	  if (nbr_par > 2 && asymp_stddev)
	    gsl_vector_set (asymp_stddev, i,
			    sqrt (-gsl_matrix_get (inv_df2, 2, 2)));
	}
    }

  /* free the memory */
  gsl_multiroot_fdfsolver_free (solver);
  gsl_vector_free (par_3plm);
  gsl_vector_free (df);
  gsl_matrix_free (df2);
  gsl_permutation_free (lu_perm);
  gsl_matrix_free (inv_df2);

  return ret_val;
}

/**
   \brief Estimate the parameters of a 2PLM or 3PLM by MMLE
   or more generally (if a prior is used) by BME.

   @param[in] nbr_par The number of parameter (1, 2 or 3).
   @param[in] max_em_iter The maximum number of EM iterations. At least 20 iteration are made.
   @param[in] max_nr_iter The maximum number of Newton iterations performed
   for each item at each EM iteration.
   @param[in] prec The relative change in the likelihood to stop the EM algorithm.
   This value divided by 10 is also the desired precision of each parameter estimate.
   @param[in] patterns A matrix(patterns x items) of binary responses.
   @param[in] counts A vector(patterns) with the count of each pattern.
   If NULL the counts are assumed to be all 1.
   @param[in] quad_points A vector(classes) with the middle points of each quadrature class.
   @param[in] quad_weights A vector(classes) with the prior weights of each quadrature class.
   @param[in] slope_prior Controls whether to use a lognormal(\em slope_mean, \em slope_dev)
   prior on the slope.
   @param[in] slope_mean The mean of the lognormal prior on the slope.
   @param[in] slope_dev The standard deviation of the lognormal prior on the slope.
   @param[in] thresh_prior Controls whether to use a normal(\em thresh_mean, \em thresh_dev)
   prior on the threshold
   @param[in] thresh_mean The mean of the normal prior on the threshold.
   @param[in] thresh_dev The standard deviation of the normal prior on the threshold.
   @param[in] asymp_prior Controls whether to use a beta(\em asymp_mean, \em asymp_weight)
   prior on the asymptote
   @param[in] asymp_mean The mean of the beta prior on the asymptote.
   The alpha and beta parameter are alpha = mean * weight + 1 and beta = (1 - mean) * weight + 1).
   @param[in] asymp_weight The weight of the beta prior on the asymptote.
   The alpha and beta parameter are alpha = mean * weight + 1 and beta = (1 - mean) * weight + 1).
   @param[in,out] slopes A vector(items) with the estimated slope parameters.
   They should be initialize first.
   @param[out] slopes_stddev A vector(items) with the standard errors of the estimated slopes.
   @param[in,out] thresholds A vector(items) with the estimated threshold parameters.
   They should be initialize first.
   @param[out] thresh_stddev A vector(items) with the standard errors of the estimated thresholds.
   @param[in,out] asymptotes A vector(items) with the estimated asymptote parameters.
   They should be initialize first.
   @param[out] asymp_stddev A vector(items) with the standard errors of the estimated asymptotes.
   @param[in] ignore A vector(items) of ignore flag.
   @param[out] nbr_notconverge The number of items that didn't converged.
   @param[out] notconverge A vector(items) of flag set for the items that didn't converged.
   @param[in] adjust_weights Controls whether adjust the quadrature weights after each iteration.

   \return 1 if the relative change in the maximum log likelihood was less than prec
   else 0.
   
   \warning The memory for the output parameters should be allocated before.
*/
int
em_bme_3plm (int nbr_par, int max_em_iter, int max_nr_iter, double prec,
	     gsl_matrix_int * patterns, gsl_vector * counts,
	     gsl_vector * quad_points, gsl_vector * quad_weights,
	     int slope_prior, double slope_mean, double slope_dev,
	     int thresh_prior, double thresh_mean, double thresh_dev,
	     int asymp_prior, double asymp_mean, double asymp_weight,
	     gsl_vector * slopes, gsl_vector * thresholds,
	     gsl_vector * asymptotes, gsl_vector * slopes_stddev,
	     gsl_vector * thresh_stddev, gsl_vector * asymp_stddev,
	     gsl_vector_int * ignore, int * nbr_notconverge,
	     gsl_vector_int * notconverge, int adjust_weights)
{
  int em_iter, nbr_quad, nbr_pattern, nbr_item, ret_val, i, j, k;
  double nbr_subject, mllk, mllk_old=0;
  gsl_matrix *quad_freqs;
  gsl_vector *quad_sizes;
  gsl_matrix *post;
  gsl_matrix *probs;

  nbr_quad = quad_points->size;
  nbr_pattern = patterns->size1;
  nbr_item = patterns->size2;

  nbr_subject = 0;
  /* count the number of subject */
  for(j = 0; j < nbr_pattern; j++)
    nbr_subject += counts ? gsl_vector_get(counts, j) : 1;

  /* allocate the memory */
  quad_freqs = gsl_matrix_alloc (nbr_item, nbr_quad);
  quad_sizes = gsl_vector_alloc (nbr_quad);
  post = gsl_matrix_alloc (nbr_pattern, nbr_quad);
  probs = gsl_matrix_alloc (nbr_item, nbr_quad);

  /* EM iterations */
  /** \todo Check for the biggest change in the EM iterations
      and add a stop criterion. */
  for (em_iter = 1; em_iter <= max_em_iter; em_iter++)
    {
      if (libirt_verbose > 2)
	printf ("\nEM iteration %d\n", em_iter);

      /* E (estimation) step */

      /* compute the response functions */
      probs_3plm (slopes, thresholds, asymptotes, quad_points, probs);

      /* compute the posterior prob */
      posteriors (patterns, probs, quad_weights, post);

      /* compute the expected sizes and frequencies */
      frequencies (patterns, counts, post, probs, quad_sizes, quad_freqs);

      /* print debugging information */
      if (libirt_verbose > 5)
	{
	  for (i = 0; i < nbr_item; i++)
	    {
		printf("Probabilities for item %d :\n", i+1);
		for (k = 0; k < nbr_quad; k++)
		  printf(" %8.2e", gsl_matrix_get(probs, i, k));
		printf("\n");
	      }
	  for (j = 0; j < nbr_pattern; j++)
	    {
	      printf("Posterior for pattern %d :\n", j+1);
	      for (k = 0; k < nbr_quad; k++)
		printf(" %8.2e", gsl_matrix_get(post,j,k));
	      printf("\n");
	    }
	  printf("Sizes :\n");
	  for (k = 0; k < nbr_quad; k++)
	    printf(" %8.2e", gsl_vector_get(quad_sizes,k));
	  printf("\n");
	  for (i = 0; i < nbr_item; i++)
	    {
		printf("Frequencies for item %d :\n", i+1);
		for (k = 0; k < nbr_quad; k++)
		  printf(" %8.2e", gsl_matrix_get(quad_freqs,i,k));
		printf("\n");
	    }
	}

      /* M (maximisation) step */
      *nbr_notconverge = bme_3plm (nbr_par, max_nr_iter, prec/10,
				   quad_freqs, quad_points, quad_sizes,
				   slope_prior, slope_mean, slope_dev,
				   thresh_prior, thresh_mean, thresh_dev,
				   asymp_prior, asymp_mean, asymp_weight,
				   slopes, thresholds, asymptotes,
				   slopes_stddev, thresh_stddev, asymp_stddev,
				   ignore, notconverge, &mllk);

      if(gsl_isnan(mllk)) {
	if (libirt_verbose > 1) printf("NAN error ! Stopping.\n");
	break;
      }

      if(adjust_weights)
	adjust_quad_weights (nbr_subject, quad_sizes, quad_points, quad_weights);
      
      if (libirt_verbose > 2)
	printf("MLLK = %10.3e %%CHANGE = %9.3e\n", mllk, fabs((mllk-mllk_old)/mllk));

      /* if the change in the maximum log likelihood is small then exit */
      if (fabs((mllk-mllk_old)/mllk) < prec && em_iter >= 20) break;

      mllk_old = mllk;
    }

  /* check if the EM algo converged */
  if (em_iter <= max_em_iter && !gsl_isnan(mllk)) ret_val = 1;
  else ret_val = 0;

  if (libirt_verbose > 0 && ret_val == 0)
    printf("The EM algorithm didn't converged after %d iterations.\n", em_iter-1);

  if (libirt_verbose > 0 && ret_val == 1)
    printf("The EM algorithm converged after %d iterations.\n", em_iter);

  /* free the memory */
  gsl_matrix_free (quad_freqs);
  gsl_vector_free (quad_sizes);
  gsl_matrix_free (post);
  gsl_matrix_free (probs);

  return ret_val;
}
