Hi Developers
I have been using vtkSMPThreadLocal
and vtkSMPThreadLocalObject
to some extent and always with a great speed-up.
The snippets below are used for a collection of fast registration algorithms for vtkPolyData
. The plan is to make them work for vtkPointSets
in general.
Question: How to handle vtkSMPThreadLocal
for abstract classes?
First the non-working code
template <typename TPointsArray, typename TVectorArray>
struct Functor
{
struct vtkLocalDataType2
{
TPointsArray* Origins;
TPointsArray* Landmarks;
TVectorArray* Normals;
int LastIndex;
vtkLocalDataType2()
: Origins(nullptr)
, Landmarks(nullptr)
, Normals(nullptr)
{
}
};
vtkSMPThreadLocalObject<TPointsArray> TLOrigins;
vtkSMPThreadLocalObject<TPointsArray> TLLandmarks;
vtkSMPThreadLocalObject<TVectorArray> TLNormals;
vtkSMPThreadLocal<int> TLLastIndex;
vtkSMPThreadLocal<vtkLocalDataType2> LocalData;
TPointsArray* Points;
TPointsArray* Origins;
TPointsArray* Landmarks;
TVectorArray* Normals;
vtkIdType Step;
vtkSpsImplicitPolyDataDistance* Locator;
vtkPolyDataCorrespondenceFilter* Filter;
Functor(TPointsArray* points, TPointsArray* origins, TPointsArray* landmarks,
TVectorArray* normals, vtkIdType step, vtkSpsImplicitPolyDataDistance* locator,
vtkPolyDataCorrespondenceFilter* filter)
: Points(points)
, Origins(origins)
, Landmarks(landmarks)
, Normals(normals)
, Step(step)
, Locator(locator)
, Filter(filter)
{
vtkIdType estSize =
std::min(this->Points->GetNumberOfTuples(), Filter->GetMaximumNumberOfLandmarks());
this->Origins->Allocate(estSize);
this->Landmarks->Allocate(estSize);
this->Normals->SetNumberOfComponents(3);
this->Normals->Allocate(estSize);
// How to instantiate the variables, TLOrigins, TLLandmarks, and TLNormals (they are abstract)
}
}
For types that are known at compile time, it is easy. The code below are working very well. Since I am operating on 4 different arrays, it could have been nice with a vtkDispatch4ByValue
provided that I figure out how to use vtkSMPThreadLocalObject
for abstract types (make the compiler instantiate them).
I know from using that for this to work, the Local()
should itself be a template like and executed like
TLNormals.template Local<TypeName>();
but that didn’t seem to work.
Thanks in advance
// Structure holding concrete instances
struct vtkLocalDataType
{
vtkPoints* Origins;
vtkPoints* Landmarks;
vtkDoubleArray* Normals;
int LastIndex;
vtkLocalDataType()
: Origins(nullptr)
, Landmarks(nullptr)
, Normals(nullptr)
{
}
};
struct ClosestPointFunctor
{
vtkSMPThreadLocalObject<vtkPoints> TLOrigins;
vtkSMPThreadLocalObject<vtkPoints> TLLandmarks;
vtkSMPThreadLocalObject<vtkDoubleArray> TLNormals;
vtkSMPThreadLocal<int> TLLastIndex;
vtkSMPThreadLocal<vtkLocalDataType> LocalData;
vtkPolyDataCorrespondenceFilter* Self;
vtkPoints* Points;
vtkIdType Step;
vtkSpsImplicitPolyDataDistance* Locator;
vtkPoints* Origins;
vtkPoints* Landmarks;
vtkDoubleArray* Normals;
ClosestPointFunctor(vtkPolyDataCorrespondenceFilter* self, vtkPoints* points, vtkIdType step,
vtkSpsImplicitPolyDataDistance* locator, vtkPoints* origins, vtkPoints* landmarks,
vtkDoubleArray* normals)
: Self(self)
, Points(points)
, Step(step)
, Locator(locator)
, Origins(origins)
, Landmarks(landmarks)
, Normals(normals)
{
int estimatedSize =
std::min(this->Points->GetNumberOfPoints(), Self->GetMaximumNumberOfLandmarks());
this->Origins->Allocate(estimatedSize);
this->Landmarks->Allocate(estimatedSize);
this->Normals->SetNumberOfComponents(3);
this->Normals->Allocate(estimatedSize);
}
void Initialize()
{
auto& localData = this->LocalData.Local();
auto& newOrigins = this->TLOrigins.Local();
auto& newLandmarks = this->TLLandmarks.Local();
auto& newNormals = this->TLNormals.Local();
int& newLastIndex = this->TLLastIndex.Local();
newLastIndex = 0;
newNormals->SetNumberOfComponents(3);
// How to make this smaller - can we access the number of threads?
vtkIdType estimatedSize = vtkIdType(this->Points->GetNumberOfPoints() / this->Step);
newOrigins->Allocate(estimatedSize);
newLandmarks->Allocate(estimatedSize);
newNormals->Allocate(estimatedSize);
localData.Landmarks = newLandmarks;
localData.Normals = newNormals;
localData.Origins = newOrigins;
localData.LastIndex = newLastIndex;
}
void operator()(vtkIdType begin, vtkIdType end)
{
auto& localData = this->LocalData.Local();
vtkPoints*& origins = localData.Origins;
vtkDoubleArray*& normals = localData.Normals;
vtkPoints*& landmarks = localData.Landmarks;
int& lastIndex = localData.LastIndex;
double signedDistance;
double originPoint[3];
double closestPoint[3];
double gradient[3];
vtkDataObject::AttributeTypes type = vtkDataObject::NUMBER_OF_ATTRIBUTE_TYPES;
for (; begin < end; ++begin)
{
if (begin % this->Step != 0)
continue;
this->Points->GetPoint(begin, originPoint);
this->Self->GetTransform()->InternalTransformPoint(originPoint, originPoint);
// Compute intersection in world coordinates
signedDistance = this->Locator->SharedEvaluate(originPoint, gradient, closestPoint, &type);
if (signedDistance < Self->GetMaximumDistance())
{
origins->InsertPoint(lastIndex, originPoint);
landmarks->InsertPoint(lastIndex, closestPoint);
normals->InsertTuple(lastIndex, gradient);
lastIndex++;
}
}
}
void Reduce()
{
using TLSIter = vtkSMPThreadLocal<vtkLocalDataType>::iterator;
TLSIter end = this->LocalData.end();
vtkIdType dstStart = 0;
for (TLSIter itr = this->LocalData.begin(); itr != end; ++itr)
{
auto origins = (*itr).Origins;
auto landmarks = (*itr).Landmarks;
auto normals = (*itr).Normals;
vtkIdType nPoints = landmarks->GetNumberOfPoints();
this->Origins->InsertPoints(dstStart, nPoints, 0, origins);
this->Landmarks->InsertPoints(dstStart, nPoints, 0, landmarks);
this->Normals->InsertTuples(dstStart, nPoints, 0, normals);
dstStart += nPoints;
}
}
};
Now, I would like to handle points, origins, and landmarks to be either float or double and likewise for the normals.