/*! @file
    @author Shane Grant
    @copyright GNU Public License (GPL v3)
    @section License
    @verbatim
    // ////////////////////////////////////////////////////////////////////////
    //              The iLab Neuromorphic Robotics Toolkit (NRT)             //
    // Copyright 2010-2012 by the University of Southern California (USC)    //
    //                          and the iLab at USC.                         //
    //                                                                       //
    //                iLab - University of Southern California               //
    //                Hedco Neurociences Building, Room HNB-10               //
    //                    Los Angeles, Ca 90089-2520 - USA                   //
    //                                                                       //
    //      See http://ilab.usc.edu for information about this project.      //
    // ////////////////////////////////////////////////////////////////////////
    // This file is part of The iLab Neuromorphic Robotics Toolkit.          //
    //                                                                       //
    // The iLab Neuromorphic Robotics Toolkit is free software: you can      //
    // redistribute it and/or modify it under the terms of the GNU General   //
    // Public License as published by the Free Software Foundation, either   //
    // version 3 of the License, or (at your option) any later version.      //
    //                                                                       //
    // The iLab Neuromorphic Robotics Toolkit is distributed in the hope     //
    // that it will be useful, but WITHOUT ANY WARRANTY; without even the    //
    // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR       //
    // PURPOSE.  See the GNU General Public License for more details.        //
    //                                                                       //
    // You should have received a copy of the GNU General Public License     //
    // along with The iLab Neuromorphic Robotics Toolkit.  If not, see       //
    // <http://www.gnu.org/licenses/>.                                       //
    // ////////////////////////////////////////////////////////////////////////
    @endverbatim */

#include <nrt/config.h>
#ifdef NRT_HAVE_CLOUD

#include <PointCloud/Features/VelodyneNormals.H>
#include <nrt/PointCloud2/Common/Transforms.H>
#include <nrt/PointCloud2/Common/Covariance.H>
#include <nrt/PointCloud2/Features/FeatureTypes/PointNormal.H> // for rendering point normals
#include <nrt/Eigen/EigenDecomposition.H>
#include "Transformation/BFGSFunctor.H"
#include "Transformation/CeresFunctor.H"
#include "RegistrationGICP.H"
//#include <nrt/PointCloud2/Search/NanoSearch.H>
#include "NanoFlann.H"
#include <nrt/PointCloud2/Search/Search.H>
#include <PointCloud/Features/Planes/details/FrameTiming.H>
#include <tbb/parallel_for.h>
#include <algorithm>
#include <random>
#include <valgrind/callgrind.h>

#include <Config/Config.H>

#include "PlaneConstraint.H"

RegistrationGICP::RegistrationGICP( nrt::CorrespondenceEstimationBase::SharedPtr const correspondenceMethod,
    std::vector<nrt::CorrespondenceRejectionBase::SharedPtr> const & correspondenceRejectionCriteria,
    nrt::ConvergenceCriteriaBase::SharedPtr const convergeCriteria ) :
  itsCorrespondenceEstimation( correspondenceMethod ),
  itsCorrespondenceRejection( correspondenceRejectionCriteria ),
  itsConvergenceCriteria( convergeCriteria ),
  itsConverged( false )
{
}

// Returns the covariance of the neighborhood (1.0 is planar, 0.0 is isotropic).
Eigen::Matrix3d getCovariance( nrt::PointCloud2 const & cloud, nrt::Indices const & indices,
    double curvThresh = 1.0 )
{
  const double gicp_epsilon = 0.001; // taken from paper, default value

  Eigen::Matrix3d covariance = Eigen::Matrix3d::Zero();
  Eigen::Vector3d mean = Eigen::Vector3d::Zero();

  for( const auto & p : cloud.subset_range( indices ) )
  {
    auto const & pt = p.geometry();
    mean[0] += pt.x();
    mean[1] += pt.y();
    mean[2] += pt.z();

    covariance(0,0) += pt.x()*pt.x();

    covariance(1,0) += pt.y()*pt.x();
    covariance(1,1) += pt.y()*pt.y();

    covariance(2,0) += pt.z()*pt.x();
    covariance(2,1) += pt.z()*pt.y();
    covariance(2,2) += pt.z()*pt.z();
  }

  mean /= static_cast<double> (indices.size());

  // Get the actual covarianceariance
  for (int k = 0; k < 3; k++)
    for (int l = 0; l <= k; l++)
    {
      covariance(k,l) /= static_cast<double> (indices.size());
      covariance(k,l) -= mean[k]*mean[l];
      covariance(l,k) = covariance(k,l);
    }


  // PCL implementation refines this further using SVD
  Eigen::JacobiSVD<Eigen::Matrix3d> svd( covariance, Eigen::ComputeFullU );
  auto U = svd.matrixU();
  //auto S = svd.singularValues();

  // remove points that are not from plane like clusters
  //assert(S[2] <= S[1] && S[1] <= S[0]);
  //double const planarity = 1.0 - S[2] / ((S[0] + S[1])/2.0);
  //double const planarity =  1.0 - (S[2] / S[0]);
  //double const planarity = S[2] / S[0];

  covariance.setZero();

  // reconstitute the covariance matrix with modified singular values using
  // the column vectors in V
  for( int k = 0; k < 3; ++k )
  {
    auto col = U.col( k );
    double v = 1.0; // biggest 2 singular values replaced by 1
    if( k == 2 ) // smallest singular value replaced by epsilon
      v = gicp_epsilon;

    covariance += v * col * col.transpose();
  }

  //return std::make_tuple(covariance, planarity);
  return covariance;
}

// ######################################################################
nrt::Indices getNeighborhoodKNN( nrt::PointCloud2 const & cloud, size_t index, std::shared_ptr<NanoFlann const> kdtree )
{
  if(!cloud[index].isValid()) return {};

  nrt::Indices neighborhood;
  for( auto neighbor : kdtree->knn( cloud[index], 20 ) )
    if( cloud[neighbor.first].isValid() )
      neighborhood.push_back( neighbor.first );

  neighborhood.push_back(index);

  return neighborhood;
}

// ######################################################################
nrt::Indices getNeighborhoodVelodyne( nrt::PointCloud2 const & cloud, size_t index )
{
  static int rows = config::lookup<double>("picp.covariance.velodyneRows");
  static int cols = config::lookup<double>("picp.covariance.velodyneCols");

  nrt::Indices neighborhood = VelodyneNormals::velodynePatch(cloud, index, rows, cols);
  neighborhood.push_back(index);

  return neighborhood;
}

// ######################################################################
nrt::Indices computeCovarianceGICP2( nrt::PointCloud2 & cloud, std::vector<std::pair<DetectedPlane, DetectedPlane>> zippedPlanes,
    double constraintThreshold, nrt::Optional<PlaneConstraint &> constraint_)
{
  assert(cloud.hasField<nrt::PointNormal>());
  cloud.addField<Covariance>();

  bool const useKNN = config::lookup<bool>("picp.covariance.useKNN");

  if(constraintThreshold >= 70000)
  {
    frame_timing::start("Dense Covariance Calc");
    auto search = std::make_shared<NanoFlann const>(cloud);
    nrt::Indices indices;
    for(size_t i=0; i<cloud.size(); ++i)
    {
      Eigen::Matrix3d covariance = getCovariance(cloud, getNeighborhoodKNN(cloud, i, search));

      indices.push_back(i);
      auto & c = cloud.get<Covariance>(i).get<Covariance>();
      c.value = covariance;
      c.valid = true;
    }
    frame_timing::stop("Dense Covariance Calc");
    return indices;
  }

  PlaneConstraint constraint(std::sqrt(constraintThreshold));

  std::vector<uint8_t> used( cloud.size(), false );

  frame_timing::start("Plane Constraints");
  // Add all of the planes into the constraints matrix
  for( auto & z : zippedPlanes )
  {
    Eigen::Vector3f averageNormal = (z.first.plane.normal() + z.second.plane.normal()).normalized();
    //constraint.addConstraint( averageNormal, 50 );
    constraint.addConstraint( averageNormal, (z.first.indices.size() + z.second.indices.size()) / 2.0 );
    for( size_t i : z.first.indices ) used[i] = true;
  }
  frame_timing::stop("Plane Constraints");

  if(constraint_) *constraint_ = constraint;

  if(constraint.radii().x() > constraintThreshold &&
     constraint.radii().y() > constraintThreshold &&
     constraint.radii().z() > constraintThreshold)
    return {};


  // Compute the scores for all points that are not on planes
  // using the constraints matrix
  frame_timing::start("Initial Point Scores");

  // Index, score, normal, planarity
  std::vector<std::tuple<size_t, double, Eigen::Vector3f, double>> scores;

  for( size_t i = 0; i < used.size(); ++i )
  {
    if( used[i] ) continue;

    auto const & point = cloud.get<nrt::PointNormal>(i);

    if( !point.geometry().isValid() ) continue;

    auto const & n = point.get<nrt::PointNormal>();
    Eigen::Vector3f normal = n.getVector3Map();

    double const & planarity = n[3];

    scores.emplace_back(i, constraint.getScore(normal) * planarity, normal, planarity);
  }

  // Sort the scores, so the highest score is first
  std::sort(scores.begin(), scores.end(),
      [](decltype(scores)::value_type const & a, decltype(scores)::value_type const & b)
      { return std::get<1>(a) > std::get<1>(b); });
  frame_timing::stop("Initial Point Scores");

  if(std::get<1>(scores[0]) == 0) return {};

  std::shared_ptr<NanoFlann> search;
  if(useKNN)
  {
    assert(false);
    frame_timing::start("Build KD Tree");
    search = std::make_shared<NanoFlann>(cloud);
    frame_timing::stop("Build KD Tree");
  }

  // Randomly choose points to add into the system using a bernoulli distribution
  // of the point constraint scores.
  frame_timing::start("Point Constraint Filling");
  nrt::Indices indices;
  std::random_device rd;
  std::mt19937 gen(rd());
  for(auto s : scores)
  {
    auto const & normal = std::get<2>(s);
    double const & planarity = std::get<3>(s);
    double const & score = constraint.getScore(normal) * planarity;

    //if(score > 0)
    //  std::cout << "score: " << score << std::endl;
    //std::bernoulli_distribution dis(std::max(score, 0.05));
    std::bernoulli_distribution dis(score);
    //if(score > 0) //dis(gen))
    if(dis(gen))
    {
      size_t const index = std::get<0>(s);

      // Compute the covariance
      nrt::Indices const neighborhood = useKNN ? getNeighborhoodKNN(cloud, index, search) : getNeighborhoodVelodyne(cloud, index);

      Eigen::Matrix3d covariance = getCovariance( cloud, neighborhood );

      auto & c = cloud.get<Covariance>(index).get<Covariance>();
      c.value = covariance;
      c.valid = true;
      constraint.addConstraint(normal, planarity);
      indices.push_back(index);

      if(constraint.radii()[2] > constraintThreshold) break;
    }
  }
  frame_timing::stop("Point Constraint Filling");

  if(constraint_) *constraint_ = constraint;


  //std::cout << "   Added " << indices.size() << " / " << scores.size() << " points ... radii: " <<
  //  constraint.radii().cwiseProduct(constraint.radii()).transpose() << std::endl;

  return indices;
}

//// ######################################################################
//nrt::Indices computeCovarianceGICP( nrt::PointCloud2 & cloud, std::vector<DetectedPlane> & planes )
//{
//  assert(cloud.size() < 80000);
//
//  PlaneConstraint constraint(150);
//
//  cloud.addField<Covariance>(); // holds covariance
//  cloud.addField<nrt::PointNormal>();
//
//  std::vector<uint8_t> used( cloud.size(), 0 );
//
//  auto calculateNormal = []( Eigen::Matrix3d const & cov )
//  {
//    Eigen::Matrix3d eigenVectors;
//    nrt::eigenDecomposition<double>( cov, nrt::OptionalEmpty, eigenVectors );
//    return Eigen::Vector3f( eigenVectors(0,0), eigenVectors(1,0), eigenVectors(2,0) );
//
//    //return nrt::PointNormal( eigenVectors(0,0), eigenVectors(1,0), eigenVectors(2,0), 0 );
//  };
//
//  frame_timing::start("plane covariance");
//  for( auto & p : planes )
//  {
//    auto covariance = *getCovariance( cloud, p.indices );
//    p.pointCovariance = covariance.cast<double>();
//    auto normal = calculateNormal( covariance );
//    nrt::PointNormal pointNormal(normal[0], normal[1], normal[2], 0);
//
//    for( auto iter = cloud.subset_begin<Covariance, nrt::PointNormal>( p.indices ),
//              end = cloud.subset_end<Covariance, nrt::PointNormal>( p.indices );
//              iter != end; ++iter )
//    {
//      iter->get<Covariance>().value = covariance;
//      //iter->get<nrt::PointNormal>() = pointNormal;
//      used[iter.index()] = true;
//    }
//
//    constraint.addConstraint(normal, p.indices.size());
//  }
//  frame_timing::stop("plane covariance");
//
//  nrt::Indices indices; indices.reserve( used.size() );
//  nrt::NanoSearch search(cloud);
//
//  std::vector<std::tuple<size_t, double, Eigen::Vector3f>> scores;
//
//  frame_timing::start("point covariance");
//  for( size_t i = 0; i < used.size(); ++i )
//  {
//    if( used[i] ) continue;
//
//    if(!cloud[i].isValid()) continue;
//
//    bool const use_knn = false;
//
//    // Find 20 nearest neighbors
//    nrt::Indices indicesLocal;
//    if(use_knn)
//    {
//      auto neighbors = search.knn( cloud[i], 20 );
//      indicesLocal.reserve( neighbors.size() );
//      for( size_t j = 0; j < neighbors.size(); ++j )
//        if(cloud[neighbors[j].first].isValid())
//          indicesLocal.push_back( neighbors[j].first );
//    }
//    else
//    {
//      indicesLocal = VelodyneNormals::velodynePatch(cloud, i, 1, 3);
//    }
//    indicesLocal.push_back(i);
//
//    // Compute covariance using these
//
//    //auto covariance = getCovariance( cloud, indicesLocal, 0.1 );
//    auto covariance = getCovariance( cloud, indicesLocal, 0.01 );
//
//    if( covariance )
//    {
//      //indices.push_back(i);
//      auto normal = calculateNormal(*covariance);
//      if(normal == normal)
//      {
//        cloud.get<Covariance>(i).get<Covariance>().value = *covariance;
//        scores.emplace_back(i, constraint.getScore(normal), normal);
//      }
//    }
//
//  }
//
//  frame_timing::start("constraints");
//
//  // Sort the scores, so the highest score is first
//  std::sort(scores.begin(), scores.end(),
//      [](decltype(scores)::value_type const & a, decltype(scores)::value_type const & b)
//      { return std::get<1>(a) > std::get<1>(b); });
//
//  size_t num_added = 0;
//  std::random_device rd;
//  std::mt19937 gen(rd());
//  for(auto s : scores)
//  {
//    auto const & normal = std::get<2>(s);
//    double const score = constraint.getScore(normal);
//    std::bernoulli_distribution dis(score);
//    if(dis(gen))
//    {
//      size_t index = std::get<0>(s);
//      constraint.addConstraint(normal, 1.0);
//      indices.push_back(index);
//      cloud.get<nrt::PointNormal>(index).get<nrt::PointNormal>() =
//        nrt::PointNormal(normal[0], normal[1], normal[2], 0);
//      ++num_added;
//    }
//  }
//
//  //std::cout << "Added " << num_added << " / " << scores.size() << " points ... radii: " << constraint.radii().transpose() << std::endl;
//
//  frame_timing::stop("constraints");
//
//  frame_timing::stop("point covariance");
//
//  return indices;
//}

void RegistrationGICP::bfgs( nrt::PointCloud2::ConstIterator<> && sourceIter,
                             nrt::PointCloud2::ConstIterator<> && targetIter,
                             std::vector<Eigen::Matrix3d> const & mahalanobis,
                             size_t numInputs,
                             AffineTransform const & guess,
                             AffineTransform & transform )
{
  int const max_inner_iterations_ = 20;
  auto & transformation_matrix = transform.matrix();

  // Set initial solution
  Vector6 x = Vector6::Zero();
  x[0] = transformation_matrix (0,3);
  x[1] = transformation_matrix (1,3);
  x[2] = transformation_matrix (2,3);
  x[3] = std::atan2 (transformation_matrix (2,1), transformation_matrix (2,2));
  x[4] = std::asin (-transformation_matrix (2,0));
  x[5] = std::atan2 (transformation_matrix (1,0), transformation_matrix (0,0));

  // Optimize using forward-difference approximation LM
  const double gradient_tol = 1e-2;

  BFGSFunctor functor( std::move( sourceIter ),
                       std::move( targetIter ),
                       mahalanobis,
                       guess,
                       numInputs );

  BFGS<BFGSFunctor> bfgs (functor);
  bfgs.parameters.sigma = 0.01;
  bfgs.parameters.rho = 0.01;
  bfgs.parameters.tau1 = 9;
  bfgs.parameters.tau2 = 0.05;
  bfgs.parameters.tau3 = 0.5;
  bfgs.parameters.order = 3;

  int inner_iterations_ = 0;
  int result = bfgs.minimizeInit (x);
  result = BFGSSpace::Running;
  do
  {
    inner_iterations_++;
    result = bfgs.minimizeOneStep (x);
    if(result)
    {
      break;
    }
    result = bfgs.testGradient(gradient_tol);
  } while(result == BFGSSpace::Running && inner_iterations_ < max_inner_iterations_);
  if(result == BFGSSpace::NoProgress || result == BFGSSpace::Success || inner_iterations_ == max_inner_iterations_)
  {
    transformation_matrix.setIdentity();
    functor.updateTransform( transformation_matrix, x );
  }
  else
    throw nrt::exception::Exception("BFGS did not converge");
}

auto RegistrationGICP::align(
    nrt::PointCloud2 const & source, nrt::Indices const & sourceIndices,
    nrt::PointCloud2 const & target, nrt::Indices const & targetIndices,
    std::vector<std::pair<DetectedPlane, DetectedPlane>> sourceTargetPlanes,
    AffineTransform const & guess ) -> AffineTransform
{
  itsConverged = false;
  itsConvergenceCriteria->reset();

  AffineTransform alignment = AffineTransform::Identity();

  // ==========================================================================
  // Set up local (filtered) copies of source and target with only the indices
  auto filterIndices = [](nrt::PointCloud2 const & cloud, nrt::Indices const & indices)
  {
    nrt::PointCloud2 result( indices.size() );
    result.addField<Covariance>();
    std::copy( cloud.subset_begin<Covariance>( indices ), cloud.subset_end<Covariance>( indices ), result.begin<Covariance>() );

    return result;
  };

  auto filterBadPoints = [](nrt::PointCloud2 const & cloud)
  {
    nrt::PointCloud2 result;
    result.addField<Covariance>();
    for(size_t i=0; i<cloud.size(); ++i)
    {
      auto const & p = cloud.at<Covariance>(i);
      if(p.geometry().isValid()) result.insert(cloud[i]);
    }
    return result;
  };


  nrt::PointCloud2 sourceSubset = nrt::transformPointCloud( filterIndices(source, sourceIndices), guess.cast<float>() );
  nrt::PointCloud2 targetSubset = filterBadPoints(target);

  //nrt::PointCloud2 targetSubset = filterIndices( target, targetIndices );


  if(sourceSubset.size() == 0 || targetSubset.size() == 0)
  {
    std::cerr << "------------------------------" << std::endl;
    std::cerr << "No points in alignment(). Just using planes." << std::endl;
    std::cerr << "------------------------------" << std::endl;
    CeresFunctor ceresSolver;
    return ceresSolver(sourceTargetPlanes, {}, {}, {}, {}, alignment, guess);
  }

  const auto originalSourceSubset = sourceSubset;

  const size_t maxIterations = itsConvergenceCriteria->maxIterations();

  //nrt::Correspondences correspondences( sourceSubset.size() );

  frame_timing::start("Build Target KD-Tree");
  auto targetSearch = std::make_shared<NanoFlann>(targetSubset, 0, 5);
  frame_timing::stop("Build Target KD-Tree");

  // Main ICP loop
  size_t iteration = 1;
  for( ; iteration <= maxIterations; ++iteration )
  {
    Eigen::Matrix4d transform_R = Eigen::Matrix4d::Zero ();
    for(size_t i = 0; i < 4; i++)
      for(size_t j = 0; j < 4; j++)
        for(size_t k = 0; k < 4; k++)
          transform_R(i,j)+= double(alignment(i,k)) * double(guess(k,j));

    Eigen::Matrix3d R = transform_R.topLeftCorner<3,3> ();

    // Find correspondences
    frame_timing::start("Neighbor Search");

    nrt::Correspondences correspondences(sourceSubset.size());
    auto corrIter = correspondences.begin();
    size_t index = 0;
    for( auto sourceIter = sourceSubset.geometry_const_begin(), sourceEnd = sourceSubset.geometry_const_end();
        sourceIter != sourceEnd; ++sourceIter, ++corrIter, ++index )
    {
      auto const neighbor = targetSearch->nn( *sourceIter );

      corrIter->sourceIndex = index;
      corrIter->targetIndex = neighbor.first;
      corrIter->score = neighbor.second;
    }
    //auto correspondences = itsCorrespondenceEstimation->findCorrespondence( sourceSubset, targetSubset );

    // Add any missing covariances in the target cloud
    for(auto c : correspondences)
    {
      size_t const index = c.targetIndex;
      auto & cov = targetSubset.get<Covariance>(index).get<Covariance>();
      if(!cov.valid)
      {
        Eigen::Matrix3d covariance = getCovariance(targetSubset, getNeighborhoodKNN(targetSubset, index, targetSearch));
        cov.value = covariance;
        cov.valid = true;
      }
    }

    frame_timing::stop("Neighbor Search");

    // Remove bad matches
    for( auto const & criteria : itsCorrespondenceRejection )
      correspondences = criteria->reject( sourceSubset, targetSubset, correspondences );

    // ========================================================================
    // Compute mahalanobis distances - non-plane to non-plane
    std::vector<Eigen::Matrix3d> mahalanobis(correspondences.size());
    for( size_t j = 0; j < correspondences.size(); ++j )
    {
      auto const & corr = correspondences[j];

      auto const & s = sourceSubset.at<Covariance>( corr.sourceIndex ).get<Covariance>();
      auto const & t = targetSubset.at<Covariance>( corr.targetIndex ).get<Covariance>();
      assert(s.valid); assert(t.valid);

      mahalanobis[j] = ( R * s.value * R.transpose() + t.value ).inverse();
    }

    // Estimate alignment and update our estimate
    frame_timing::start("GICP Solving");
    bool const use_ceres = true;
    if(use_ceres)
    {
      CeresFunctor ceresSolver;
      alignment = ceresSolver(sourceTargetPlanes, originalSourceSubset, targetSubset, correspondences, mahalanobis, alignment, guess);
    }
    else
    {
      // Make sure to pass this the original source points and not the currently transformed ones,
      // thanks to the architecture copied from GICP implementation
      bfgs(
          originalSourceSubset.subset_const_begin( getSourceIndices( correspondences ) ),
          targetSubset.subset_const_begin( getTargetIndices( correspondences ) ),
          mahalanobis,
          correspondences.size(),
          guess,
          alignment );
    }
    frame_timing::stop("GICP Solving");

    // Check for convergence
    itsConverged = itsConvergenceCriteria->converged( iteration, alignment.cast<float>(), correspondences );

    if( itsConverged ) break;
    else sourceSubset = nrt::transformPointCloud( originalSourceSubset, (alignment * guess).cast<float>() );
  }

  if( !itsConverged ) NRT_DEBUG("align did not converge");

  //if(alignment.translation().norm() > 0.5)
  //{
  //  std::cerr << "-----------------------------------------------" << std::endl;
  //  std::cerr << "-----------------------------------------------" << std::endl;
  //  std::cerr << "Large translation detected! Returning identity." << std::endl;
  //  std::cerr << "-----------------------------------------------" << std::endl;
  //  std::cerr << "-----------------------------------------------" << std::endl;
  //  return AffineTransform::Identity();
  //}


  return alignment * guess;
}

bool RegistrationGICP::converged() const
{
  return itsConverged;
}

#endif // NRT_HAVE_CLOUD
