#include <PointCloud/Features/VelodyneNormalsFLAS.H>
#include <nrt/PointCloud2/Features/FeatureTypes/PointNormal.H>
#include <nrt/ImageProc/Reshaping/Rescale.H>

template <size_t numRows>
VelodyneNormalsFLAS<numRows>::VelodyneNormalsFLAS(int d_theta, int d_phi) :
  d_theta_(d_theta), d_phi_(d_phi), max_columns_(2300),
  thetas_(max_columns_),
  M_(max_columns_, {{Eigen::Matrix3d::Zero()}}),
  v_(max_columns_, {{Eigen::Vector3d::Zero()}}),
  vx_(max_columns_,numRows), vy_(max_columns_,numRows), vz_(max_columns_,numRows)
{
  for(int c=0; c<max_columns_; ++c)
  {
    float const theta = velodynehelpers::col2angle(c, max_columns_);
    thetas_[c] = theta;
    theta_indices_[theta] = c;

    for(int r=0; r<numRows; ++r)
    {
      float const phi = velodynehelpers::row2angle<numRows>(r);

      Eigen::Vector3d v (
          std::sin(phi) * std::cos(theta),
          std::sin(phi) * std::sin(theta),
          std::cos(phi)
          );

      v_[c][r] = v;
      vx_(c, r) = v.x();
      vy_(c, r) = v.y();
      vz_(c, r) = v.z();

      if(r < d_phi_ || r >= numRows-d_phi_ || c < d_theta_ || c >=max_columns_-d_theta_)
        continue;

      Eigen::Matrix3d M = Eigen::Matrix3d::Zero();
      for(int co=c-d_theta_; co<=c+d_theta_; ++co)
        for(int ro=r-d_phi_; ro<=r+d_phi_; ++ro)
        {
          float const theta_o = velodynehelpers::col2angle(co, max_columns_);
          float const phi_o = velodynehelpers::row2angle<numRows>(ro);

          Eigen::Vector3d vo (
              std::sin(phi_o) * std::cos(theta_o),
              std::sin(phi_o) * std::sin(theta_o),
              std::cos(phi_o)
              );
          M += vo * vo.transpose();
        }
      M_[c][r] = M.inverse();
    }
  }
  std::sort(thetas_.begin(), thetas_.end());
}

    // ######################################################################
template <size_t numRows>
auto VelodyneNormalsFLAS<numRows>::make_integral_images(Image<float> const & range) ->
    std::tuple<Image<double>, Image<double>, Image<double>>
{
  Image<double> ivx(vx_.width(), vx_.height(), nrt::ImageInitPolicy::Zeros);
  Image<double> ivy(vy_.width(), vy_.height(), nrt::ImageInitPolicy::Zeros);
  Image<double> ivz(vz_.width(), vz_.height(), nrt::ImageInitPolicy::Zeros);

  if(range.width() > max_columns_)
  {
    throw std::runtime_error("Error computing integral image in VelodyneNormalsFLAS.C (line " + std::to_string(__LINE__) + ").\n"
        "Got range image with " + std::to_string(range.width()) + " columns, but max is " + std::to_string(max_columns_) + ".");
  }

  int const i_width = ivx.width();

  auto computeR = [](double r_val)
  {
    return std::isnormal( r_val ) ? 1.0 / r_val : 0.0;
  };

  // Compute the first row
  {
    auto x_ptr  = vx_.const_pod_begin();
    auto y_ptr  = vy_.const_pod_begin();
    auto z_ptr  = vz_.const_pod_begin();
    auto ix_ptr = ivx.pod_begin();
    auto iy_ptr = ivy.pod_begin();
    auto iz_ptr = ivz.pod_begin();
    auto r_ptr  = range.const_pod_begin();
    auto const r_end  = r_ptr + range.width();
    double ix=0, iy=0, iz=0;

    while(r_ptr != r_end)
    {
      double r = computeR(*r_ptr);
      ix += (*x_ptr) * r;
      iy += (*y_ptr) * r;
      iz += (*z_ptr) * r;

      *ix_ptr = ix;
      *iy_ptr = iy;
      *iz_ptr = iz;

      ++ix_ptr; ++iy_ptr; ++iz_ptr;
      ++x_ptr; ++y_ptr; ++z_ptr;
      ++r_ptr;
    }
  }

  // Compute the rest of the rows
  for(int row=1; row<numRows; ++row)
  {
    auto x_ptr  = vx_.const_pod_begin()   + row*vx_.width();
    auto y_ptr  = vy_.const_pod_begin()   + row*vy_.width();
    auto z_ptr  = vz_.const_pod_begin()   + row*vz_.width();
    auto ix_ptr = ivx.pod_begin()         + row*i_width;
    auto iy_ptr = ivy.pod_begin()         + row*i_width;
    auto iz_ptr = ivz.pod_begin()         + row*i_width;
    auto r_ptr  = range.const_pod_begin() + row*range.width();

    auto const r_end = r_ptr + range.width();

    // Compute the first column
    double r = computeR(*r_ptr);
    *ix_ptr = *(ix_ptr-i_width) + (*x_ptr) * r;
    *iy_ptr = *(iy_ptr-i_width) + (*y_ptr) * r;
    *iz_ptr = *(iz_ptr-i_width) + (*z_ptr) * r;

    ++ix_ptr; ++iy_ptr; ++iz_ptr;
    ++x_ptr; ++y_ptr; ++z_ptr;
    ++r_ptr;

    while(r_ptr != r_end)
    {
      double r = computeR(*r_ptr);

      *ix_ptr = *(ix_ptr-i_width) + *(ix_ptr-1) - *(ix_ptr-i_width-1) + (*x_ptr) * r;
      *iy_ptr = *(iy_ptr-i_width) + *(iy_ptr-1) - *(iy_ptr-i_width-1) + (*y_ptr) * r;
      *iz_ptr = *(iz_ptr-i_width) + *(iz_ptr-1) - *(iz_ptr-i_width-1) + (*z_ptr) * r;

      ++ix_ptr; ++iy_ptr; ++iz_ptr;
      ++x_ptr; ++y_ptr; ++z_ptr;
      ++r_ptr;
    }
  }

  return std::make_tuple(ivx, ivy, ivz);
}

// ######################################################################
//template <size_t numRows>
//std::vector<float> compute_thetas(nrt::PointCloud2 const & cloud)
//{
//  int const ncols = cloud.size()/numRows;
//  std::vector<float> thetas(ncols);
//
//  for(int c=0; c<ncols; ++c)
//  {
//    // Find the first valid point in the column
//    nrt::PointCloud2::Geometry point;
//    auto cloud_ptr = cloud.begin() + velodynehelpers::index<numRows>(0, c);
//    auto cloud_end = cloud_ptr + ncols*numRows;
//    do { point = cloud_ptr->geometry(); cloud_ptr += ncols; }
//    while(!point.isValid() && point.getVector3Map().norm() < 0.25 && cloud_ptr != cloud_end);
//
//    float const actual_theta = std::atan2(point.y(), point.x());
//    thetas[c] = actual_theta;
//  }
//
//  return thetas;
//}

// ######################################################################
template <size_t numRows>
void VelodyneNormalsFLAS<numRows>::compute(nrt::PointCloud2 & cloud)
{
  frame_timing::start("FLAS Normals");
  cloud.addField<nrt::PointNormal>();

  int const ncols = cloud.size()/numRows;

  Image<float> range = velodynerange::rangeImage<numRows>(cloud);

  // Interpolate the range image so that it has the same number of columns as our matrix cache
  Image<float> interpolated_range = nrt::rescaleBilinear<void>(range, nrt::Dims<int>(max_columns_, numRows));

  Image<double> ix,iy,iz;
  std::tie(ix,iy,iz) = make_integral_images(interpolated_range);

  nrt::Image<nrt::PixRGB<float>> normals_interpolated(interpolated_range.dims());

  for(int c=0; c<max_columns_; ++c)
  {
    if(c < d_theta_+1 || c >= max_columns_-d_theta_) continue;

    for(int r=0; r<numRows; ++r)
    {
      if(r < d_phi_+1 || r >= numRows-d_phi_) continue;

      Eigen::Vector3d b = Eigen::Vector3d::Zero();

      b.x() =
        + ix(c+d_theta_, r+d_phi_).val()
        - ix(c+d_theta_, r-d_phi_-1).val()
        - ix(c-d_theta_-1, r+d_phi_).val()
        + ix(c-d_theta_-1, r-d_phi_-1).val();

      b.y() =
        + iy(c+d_theta_, r+d_phi_).val()
        - iy(c+d_theta_, r-d_phi_-1).val()
        - iy(c-d_theta_-1, r+d_phi_).val()
        + iy(c-d_theta_-1, r-d_phi_-1).val();

      b.z() =
        + iz(c+d_theta_, r+d_phi_).val()
        - iz(c+d_theta_, r-d_phi_-1).val()
        - iz(c-d_theta_-1, r+d_phi_).val()
        + iz(c-d_theta_-1, r-d_phi_-1).val();

      Eigen::Vector3d n = M_[c][r] * b;

      if( v_[c][r].dot( n ) > 0 ) n *= -1;

      n.normalize();
      normals_interpolated(c,r) = {n[0], n[1], n[2]};
    }
  }

  // Interpolate the calculated normals back into our point cloud
  nrt::Image<nrt::PixRGB<float>> normals = nrt::rescaleBilinear<void>(normals_interpolated, range.dims());
  for(int c=0; c<ncols; ++c)
  {
    for(int r=0; r<numRows; ++r)
    {
      auto const & n = normals(c,r).channels;
      cloud.get<nrt::PointNormal>(velodynehelpers::index<numRows>(r,c)).template get<nrt::PointNormal>() = nrt::PointNormal(n[0], n[1], n[2], 0);
    }
  }

  frame_timing::stop("FLAS Normals");
}

template class VelodyneNormalsFLAS<32>;
template class VelodyneNormalsFLAS<64>;
