vtkSMPThreadLocal abstract types

Hi Developers

I have been using vtkSMPThreadLocal and vtkSMPThreadLocalObject to some extent and always with a great speed-up.

The snippets below are used for a collection of fast registration algorithms for vtkPolyData. The plan is to make them work for vtkPointSets in general.

Question: How to handle vtkSMPThreadLocal for abstract classes?

First the non-working code

template <typename TPointsArray, typename TVectorArray>
struct Functor
{
  struct vtkLocalDataType2
  {
    TPointsArray* Origins;
    TPointsArray* Landmarks;
    TVectorArray* Normals;
    int LastIndex;
    vtkLocalDataType2()
      : Origins(nullptr)
      , Landmarks(nullptr)
      , Normals(nullptr)
    {
    }
  };

  vtkSMPThreadLocalObject<TPointsArray> TLOrigins;
  vtkSMPThreadLocalObject<TPointsArray> TLLandmarks;
  vtkSMPThreadLocalObject<TVectorArray> TLNormals;
  vtkSMPThreadLocal<int> TLLastIndex;
  vtkSMPThreadLocal<vtkLocalDataType2> LocalData;

  TPointsArray* Points;
  TPointsArray* Origins;
  TPointsArray* Landmarks;
  TVectorArray* Normals;
  vtkIdType Step;
  vtkSpsImplicitPolyDataDistance* Locator;
  vtkPolyDataCorrespondenceFilter* Filter;

  Functor(TPointsArray* points, TPointsArray* origins, TPointsArray* landmarks,
    TVectorArray* normals, vtkIdType step, vtkSpsImplicitPolyDataDistance* locator,
    vtkPolyDataCorrespondenceFilter* filter)
    : Points(points)
    , Origins(origins)
    , Landmarks(landmarks)
    , Normals(normals)
    , Step(step)
    , Locator(locator)
    , Filter(filter)
  {
    vtkIdType estSize =
      std::min(this->Points->GetNumberOfTuples(), Filter->GetMaximumNumberOfLandmarks());
    this->Origins->Allocate(estSize);
    this->Landmarks->Allocate(estSize);
    this->Normals->SetNumberOfComponents(3);
    this->Normals->Allocate(estSize);
// How to instantiate the variables, TLOrigins, TLLandmarks, and TLNormals (they are abstract)
  }
}

For types that are known at compile time, it is easy. The code below are working very well. Since I am operating on 4 different arrays, it could have been nice with a vtkDispatch4ByValue provided that I figure out how to use vtkSMPThreadLocalObject for abstract types (make the compiler instantiate them).

I know from using that for this to work, the Local() should itself be a template like and executed like

TLNormals.template Local<TypeName>();

but that didn’t seem to work.

Thanks in advance

// Structure holding concrete instances
struct vtkLocalDataType
{
  vtkPoints* Origins;
  vtkPoints* Landmarks;
  vtkDoubleArray* Normals;
  int LastIndex;
  vtkLocalDataType()
    : Origins(nullptr)
    , Landmarks(nullptr)
    , Normals(nullptr)
  {
  }
};

struct ClosestPointFunctor
{
  vtkSMPThreadLocalObject<vtkPoints> TLOrigins;
  vtkSMPThreadLocalObject<vtkPoints> TLLandmarks;
  vtkSMPThreadLocalObject<vtkDoubleArray> TLNormals;
  vtkSMPThreadLocal<int> TLLastIndex;
  vtkSMPThreadLocal<vtkLocalDataType> LocalData;

  vtkPolyDataCorrespondenceFilter* Self;
  vtkPoints* Points;
  vtkIdType Step;
  vtkSpsImplicitPolyDataDistance* Locator;
  vtkPoints* Origins;
  vtkPoints* Landmarks;
  vtkDoubleArray* Normals;

  ClosestPointFunctor(vtkPolyDataCorrespondenceFilter* self, vtkPoints* points, vtkIdType step,
    vtkSpsImplicitPolyDataDistance* locator, vtkPoints* origins, vtkPoints* landmarks,
    vtkDoubleArray* normals)
    : Self(self)
    , Points(points)
    , Step(step)
    , Locator(locator)
    , Origins(origins)
    , Landmarks(landmarks)
    , Normals(normals)

  {
    int estimatedSize =
      std::min(this->Points->GetNumberOfPoints(), Self->GetMaximumNumberOfLandmarks());
    this->Origins->Allocate(estimatedSize);
    this->Landmarks->Allocate(estimatedSize);
    this->Normals->SetNumberOfComponents(3);
    this->Normals->Allocate(estimatedSize);
  }
  void Initialize()
  {
    auto& localData = this->LocalData.Local();
    auto& newOrigins = this->TLOrigins.Local();
    auto& newLandmarks = this->TLLandmarks.Local();
    auto& newNormals = this->TLNormals.Local();
    int& newLastIndex = this->TLLastIndex.Local();
    newLastIndex = 0;
    newNormals->SetNumberOfComponents(3);

    // How to make this smaller - can we access the number of threads?
    vtkIdType estimatedSize = vtkIdType(this->Points->GetNumberOfPoints() / this->Step);
    newOrigins->Allocate(estimatedSize);
    newLandmarks->Allocate(estimatedSize);
    newNormals->Allocate(estimatedSize);
    localData.Landmarks = newLandmarks;
    localData.Normals = newNormals;
    localData.Origins = newOrigins;
    localData.LastIndex = newLastIndex;
  }
  void operator()(vtkIdType begin, vtkIdType end)
  {
    auto& localData = this->LocalData.Local();

    vtkPoints*& origins = localData.Origins;
    vtkDoubleArray*& normals = localData.Normals;
    vtkPoints*& landmarks = localData.Landmarks;
    int& lastIndex = localData.LastIndex;

    double signedDistance;
    double originPoint[3];
    double closestPoint[3];
    double gradient[3];
    vtkDataObject::AttributeTypes type = vtkDataObject::NUMBER_OF_ATTRIBUTE_TYPES;
    for (; begin < end; ++begin)
    {
      if (begin % this->Step != 0)
        continue;

      this->Points->GetPoint(begin, originPoint);
      this->Self->GetTransform()->InternalTransformPoint(originPoint, originPoint);
      // Compute intersection in world coordinates
      signedDistance = this->Locator->SharedEvaluate(originPoint, gradient, closestPoint, &type);

      if (signedDistance < Self->GetMaximumDistance())
      {
        origins->InsertPoint(lastIndex, originPoint);
        landmarks->InsertPoint(lastIndex, closestPoint);
        normals->InsertTuple(lastIndex, gradient);
        lastIndex++;
      }
    }
  }
  void Reduce()
  {
    using TLSIter = vtkSMPThreadLocal<vtkLocalDataType>::iterator;
    TLSIter end = this->LocalData.end();
    vtkIdType dstStart = 0;
    for (TLSIter itr = this->LocalData.begin(); itr != end; ++itr)
    {
      auto origins = (*itr).Origins;
      auto landmarks = (*itr).Landmarks;
      auto normals = (*itr).Normals;
      vtkIdType nPoints = landmarks->GetNumberOfPoints();
      this->Origins->InsertPoints(dstStart, nPoints, 0, origins);
      this->Landmarks->InsertPoints(dstStart, nPoints, 0, landmarks);
      this->Normals->InsertTuples(dstStart, nPoints, 0, normals);
      dstStart += nPoints;
    }
  }
};

Now, I would like to handle points, origins, and landmarks to be either float or double and likewise for the normals.

I almost managed to get it to work, now just an issue about allocating enough data, inserting and afterwards setting the number of tuples for a vtkDataArray, but that should be doable. Will post the solution when it is working.

Why not storing a pointer or a smart pointer in vtkSMPThreadLocal? You could have either a std::unique_ptr<T> if T is a non-VTK type, or vtkSmartPointer<T> otherwise.

Another solution could be to use templating on your functor, using a concrete class.

There is no dispatch mechanism for 4 values. The tree spanned at compile-time by dispatching has n^p elements, where n is the number of types to dispatch and p the number of values, making compile time atrocious. Dispatching with 3 values is already borderline and I usually tend to avoid it at all cost.

I understand why dispatching 4 arrays is bad idea, already annoyed about the insane compilation time due to the existing dispatcher.

Perhaps not an elegant solution, but I ended up templating the functor and do manual dispatching. Something along the lines.

int outputPrecisionType = (this->OutputPrecision == vtkAlgorithm::DEFAULT_PRECISION
    ? source->GetPoints()->GetDataType()
    : (this->OutputPrecision == vtkAlgorithm::SINGLE_PRECISION ? VTK_FLOAT : VTK_DOUBLE));

int inputPrecision = source->GetPoints()->GetDataType();

vtkNew<vtkDoubleArray> landmarkNormals;
if (inputPrecision == VTK_FLOAT)
{
  Functor<vtkFloatArray, vtkFloatArray, vtkDoubleArray> fun(
    vtkFloatArray::SafeDownCast(source->GetPoints()->GetData()),
    vtkFloatArray::SafeDownCast(points->GetData()),
    vtkDoubleArray::SafeDownCast(landmarks->GetData()), landmarkNormals, step, locator, this);
  vtkSMPTools::For(0, source->GetNumberOfPoints(), fun);
}
else
{
  Functor<vtkDoubleArray, vtkDoubleArray, vtkDoubleArray> fun(
    vtkDoubleArray::SafeDownCast(source->GetPoints()->GetData()),
    vtkDoubleArray::SafeDownCast(points->GetData()),
    vtkDoubleArray::SafeDownCast(landmarks->GetData()), landmarkNormals, step, locator, this);
  vtkSMPTools::For(0, source->GetNumberOfPoints(), fun);
}

Will of course use vtkSmartPointer for the arrays.

Hi @Yohann_Bearzi

I am considering making a much smaller project like vtkBool where I implement filter for registration of vtkPolyData. As a start it will contain an ICP similar to vtkIterativeClosestPoint, but 20 faster using Gauss-Newton and version which uses the point-to-plane metric.

Do you have a CMake template for creating an external module for VTK that can be easily used.

The only change that I need for VTK core is to add a public function.

double EvaluateFunctionAndGetClosestPointAndGradient(double x[3], double closestPoint[3], double gradient[3]);

to vtkImplicitPolyDataDistance.

The optimization will be made with Eigen and using only the content that is available in vtkEigen.

Thanks in advance
Jens

You can find a small demonstration of the library here. The purpose is to investigate better registration methods and for sure things can be optimized. For the small demonstration project, I haven’t even configured the compilation flags properly.

You can find library here: