How to properly override GetFiniteValue in our vtkGenericDataArray<Derived, T> class

In our application we have implemented a wrapper class using the vtkGenericDataArray<DerivedType, T> class. Our class is called CV::Array. Generally things are working fine but as I start to profile our code to see what is taking so long for some operations (Volume rendering is my current rabbit hole) I am seeing a pattern which is the call to vtkDataArray::GetFiniteRange(int). That will then cause the chain of “ComputeFiniteRange()” to get hit. Thing is, we already have computed those values and cached them in the CV::Array instance. I have been looking for something to override from vtkGenericDataArray<> in order to use the already cached values but I feel like I am just missing something, hopefully obvious.

I’ve been trying to dig into the VTK sources to try and figure out what we missed when we implemented CV::Array but I’m coming up empty.

For example here is one stack trace from my MacOS Machine:

4.40 s  100.0%	0 s	                                                      vtkVolumeTexture::SelectTextureFormat(unsigned int&, unsigned int&, int&, int, int)
4.40 s  100.0%	0 s	                                                       vtkDataArray::GetFiniteRange(int)
4.40 s  100.0%	0 s	                                                        vtkDataArray::GetFiniteRange(double*, int)
4.40 s  100.0%	0 s	                                                         vtkDataArray::ComputeFiniteRange(double*, int)
4.40 s  100.0%	0 s	                                                          vtkDataArray::ComputeFiniteRange(double*, int, unsigned char const*, unsigned char)
4.40 s  100.0%	0 s	                                                           vtkDataArray::ComputeFiniteScalarRange(double*)
4.40 s  100.0%	0 s	                                                            vtkDataArray::ComputeFiniteScalarRange(double*, unsigned char const*, unsigned char)
4.40 s  100.0%	0 s	                                                             void (anonymous namespace)::FiniteScalarRangeDispatchWrapper::operator()<vtkDataArray>(vtkDataArray*)
4.40 s  100.0%	0 s	                                                              bool vtkDataArrayPrivate::DoComputeScalarRange<vtkDataArray, double, vtkDataArrayPrivate::FiniteValues>(vtkDataArray*, double*, vtkDataArrayPrivate::FiniteValues, unsigned char const*, unsigned char)
4.40 s  100.0%	0 s	                                                               bool vtkDataArrayPrivate::ComputeScalarRange<1>::operator()<vtkDataArray, double>(vtkDataArray*, double*, vtkDataArrayPrivate::FiniteValues, unsigned char const*, unsigned char)
4.40 s  100.0%	0 s	                                                                void vtkSMPTools::For<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>>(long long, long long, vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>&)
4.40 s  100.0%	0 s	                                                                 void vtkSMPTools::For<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>>(long long, long long, long long, vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>&)
4.40 s  100.0%	0 s	                                                                  vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>::For(long long, long long, long long)
4.40 s  100.0%	0 s	                                                                   void vtk::detail::smp::vtkSMPToolsAPI::For<vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>>(long long, long long, long long, vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>&)
4.40 s  100.0%	0 s	                                                                    void vtk::detail::smp::vtkSMPToolsImpl<(vtk::detail::smp::BackendType)2>::For<vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>>(long long, long long, long long, vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>&)
4.40 s  100.0%	0 s	                                                                     vtk::detail::smp::vtkSMPToolsImplForTBB(long long, long long, long long, void (*)(void*, long long, long long, long long), void*)
4.40 s  100.0%	0 s	                                                                      void vtk::detail::smp::ExecuteFunctorTBB<vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>>(void*, long long, long long, long long)
4.40 s  100.0%	0 s	                                                                       void tbb::detail::d1::parallel_for<tbb::detail::d1::blocked_range<long long>, vtk::detail::smp::FuncCall<vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>>>(tbb::detail::d1::blocked_range<long long> const&, vtk::detail::smp::FuncCall<vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>> const&)
4.40 s  100.0%	0 s	                                                                        tbb::detail::d1::start_for<tbb::detail::d1::blocked_range<long long>, vtk::detail::smp::FuncCall<vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>>, tbb::detail::d1::auto_partitioner const>::run(tbb::detail::d1::blocked_range<long long> const&, vtk::detail::smp::FuncCall<vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>> const&, tbb::detail::d1::auto_partitioner const&)
4.40 s  100.0%	0 s	                                                                         tbb::detail::d1::start_for<tbb::detail::d1::blocked_range<long long>, vtk::detail::smp::FuncCall<vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>>, tbb::detail::d1::auto_partitioner const>::run(tbb::detail::d1::blocked_range<long long> const&, vtk::detail::smp::FuncCall<vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>> const&, tbb::detail::d1::auto_partitioner const&, tbb::detail::d1::task_group_context&)
4.40 s  100.0%	0 s	                                                                          tbb::detail::d1::execute_and_wait(tbb::detail::d1::task&, tbb::detail::d1::task_group_context&, tbb::detail::d1::wait_context&, tbb::detail::d1::task_group_context&)
4.40 s  100.0%	0 s	                                                                           tbb::detail::r1::execute_and_wait(tbb::detail::d1::task&, tbb::detail::d1::task_group_context&, tbb::detail::d1::wait_context&, tbb::detail::d1::task_group_context&)
4.40 s  100.0%	0 s	                                                                            tbb::detail::r1::task_dispatcher::execute_and_wait(tbb::detail::d1::task*, tbb::detail::d1::wait_context&, tbb::detail::d1::task_group_context&)
4.40 s  100.0%	0 s	                                                                             tbb::detail::d1::task* tbb::detail::r1::task_dispatcher::local_wait_for_all<tbb::detail::r1::external_waiter>(tbb::detail::d1::task*, tbb::detail::r1::external_waiter&)
4.40 s  100.0%	0 s	                                                                              tbb::detail::d1::task* tbb::detail::r1::task_dispatcher::local_wait_for_all<false, tbb::detail::r1::external_waiter>(tbb::detail::d1::task*, tbb::detail::r1::external_waiter&)
4.40 s  100.0%	0 s	                                                                               tbb::detail::d1::start_for<tbb::detail::d1::blocked_range<long long>, vtk::detail::smp::FuncCall<vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>>, tbb::detail::d1::auto_partitioner const>::execute(tbb::detail::d1::execution_data&)
4.40 s  100.0%	0 s	                                                                                void tbb::detail::d1::partition_type_base<tbb::detail::d1::auto_partition_type>::execute<tbb::detail::d1::start_for<tbb::detail::d1::blocked_range<long long>, vtk::detail::smp::FuncCall<vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>>, tbb::detail::d1::auto_partitioner const>, tbb::detail::d1::blocked_range<long long>>(tbb::detail::d1::start_for<tbb::detail::d1::blocked_range<long long>, vtk::detail::smp::FuncCall<vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>>, tbb::detail::d1::auto_partitioner const>&, tbb::detail::d1::blocked_range<long long>&, tbb::detail::d1::execution_data&)
4.40 s  100.0%	0 s	                                                                                 void tbb::detail::d1::dynamic_grainsize_mode<tbb::detail::d1::adaptive_mode<tbb::detail::d1::auto_partition_type>>::work_balance<tbb::detail::d1::start_for<tbb::detail::d1::blocked_range<long long>, vtk::detail::smp::FuncCall<vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>>, tbb::detail::d1::auto_partitioner const>, tbb::detail::d1::blocked_range<long long>>(tbb::detail::d1::start_for<tbb::detail::d1::blocked_range<long long>, vtk::detail::smp::FuncCall<vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>>, tbb::detail::d1::auto_partitioner const>&, tbb::detail::d1::blocked_range<long long>&, tbb::detail::d1::execution_data&)
4.40 s  100.0%	0 s	                                                                                  tbb::detail::d1::start_for<tbb::detail::d1::blocked_range<long long>, vtk::detail::smp::FuncCall<vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>>, tbb::detail::d1::auto_partitioner const>::run_body(tbb::detail::d1::blocked_range<long long>&)
4.40 s  100.0%	0 s	                                                                                   vtk::detail::smp::FuncCall<vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>>::operator()(tbb::detail::d1::blocked_range<long long> const&) const
4.40 s  100.0%	0 s	                                                                                    vtk::detail::smp::vtkSMPTools_FunctorInternal<vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>, true>::Execute(long long, long long)
4.40 s  100.0%	329.00 ms	                                                                                     vtkDataArrayPrivate::FiniteMinAndMax<1, vtkDataArray, double>::operator()(long long, long long)

I know the volume data that I am loading should be one of our CV::Arrays and the range should have already been computed. Maybe I’m just wrong and the input CV::Array has morphed into a vtkDataArray? Dunno. I feel lost so any help would be greatly appreciated.

I have implemented void GetFiniteValueRange(T* range, int compIdx) in CV::Array. The superclass’s version is not virtual so I’m sure I should be trying that.

Thank You.

I would need to see at least the header file of what you have implemented to assist you here.
There is a lot of machinery that is needed for the range computation to be efficient.

The best way to avoid having ComputeFiniteRange doing the actual computation, is to set the COMPONENT_RANGE key on the PER_FINITE_COMPONENT information vector.

Something like this:

vtkDataArray* da; // => data array of interest

double ucRange[2] = {0.0, 255.0}; // => pre-computed range

vtkInformation* info = da->GetInformation();
vtkInformationDoubleVectorKey* rkey = vtkDataArray::COMPONENT_RANGE();
//construct the keys and add them to the info object
vtkInformationVector* infoVec = vtkInformationVector::New();
info->Set(vtkDataArray::PER_FINITE_COMPONENT(), infoVec);
infoVec->SetNumberOfInformationObjects( da->GetNumberOfComponents() );
for ( int i = 0; i < da->GetNumberOfComponents(); ++i )
{
   infoVec->GetInformationObject( i )->Set( rkey, ucRange, 2 );
}
infoVec->FastDelete();

Here is our class:

#pragma once

#define VTK_GDA_VALUERANGE_INSTANTIATING
#include <vtkArrayIteratorIncludes.h>
#include <vtkArrayIteratorTemplate.h>
#include <vtkGenericDataArray.h>
#include <vtkInformation.h>
#include <vtkInformationIntegerKey.h>
#include <vtkInformationVector.h>
#include <vtkSetGet.h>
#include <vtkType.h>

#include "simplnx/Common/Range.hpp"
#include "simplnx/DataStructure/DataArray.hpp"
#include "simplnx/DataStructure/DataStore.hpp"
#include "simplnx/Utilities/TimeUtilities.hpp"

#ifdef SIMPLNX_ENABLE_MULTICORE
#include <tbb/blocked_range.h>
#include <tbb/parallel_reduce.h>
#endif

#include <array>
#include <limits>
#include <memory>
#include <vector>

namespace CV
{

/**
 * @struct CV::ComputeRangeFunctor
 * @brief Computes the min and max value for each component across all tuples
 * in a simplnx DataStore using TBB parallel_reduce.
 *
 * The DataStore stores data in AOS (Array of Structures) layout where
 * the element[tupleIdx * numComponents + compIdx] gives the value for a
 * specific tuple and component. This struct iterates over tuples and
 * tracks per-component min/max values, merging partial results via join().
 *
 * @tparam T The underlying primitive type of the DataStore
 */
template <typename T>
struct ComputeRangeFunctor
{
  const nx::core::DataStore<T>& dataStore;
  size_t numComponents;
  std::vector<T> compMin;
  std::vector<T> compMax;
  std::array<T, 2> l2Norm;

  /**
   * @brief Constructs the body with initial identity values.
   * @param store Reference to the DataStore to compute over
   * @param nComp Number of components per tuple
   */
  ComputeRangeFunctor(const nx::core::DataStore<T>& store, size_t nComp)
  : dataStore(store)
  , numComponents(nComp)
  , compMin(nComp, std::numeric_limits<T>::max())
  , compMax(nComp, std::numeric_limits<T>::lowest())
  {
    l2Norm = {std::numeric_limits<T>::max(), std::numeric_limits<T>::min()};
  }

#ifdef SIMPLNX_ENABLE_MULTICORE
  /**
   * @brief Splitting constructor required by tbb::parallel_reduce.
   * Initializes a fresh set of per-component min/max values.
   */
  ComputeRangeFunctor(ComputeRangeFunctor& other, tbb::split)
  : dataStore(other.dataStore)
  , numComponents(other.numComponents)
  , compMin(other.numComponents, std::numeric_limits<T>::max())
  , compMax(other.numComponents, std::numeric_limits<T>::lowest())
  {
    l2Norm = {std::numeric_limits<T>::max(), std::numeric_limits<T>::min()};
  }
#endif

  /**
   * @brief Processes a range of tuples, updating per-component min/max.
   * @param range The range of tuple indices to process
   */
  void operator()(const tbb::blocked_range<size_t>& range)
  {
    for(size_t tupleIdx = range.begin(); tupleIdx != range.end(); ++tupleIdx)
    {
      double magnitude = 0;

      const size_t offset = tupleIdx * numComponents;
      for(size_t comp = 0; comp < numComponents; ++comp)
      {
        T value = dataStore[offset + comp];
        magnitude = magnitude + (static_cast<double>(value) * static_cast<double>(value));
        if(value < compMin[comp])
        {
          compMin[comp] = value;
        }
        if(value > compMax[comp])
        {
          compMax[comp] = value;
        }
      }
      magnitude = std::sqrt(magnitude);
      if(magnitude < l2Norm[0])
      {
        l2Norm[0] = static_cast<T>(magnitude);
      }
      if(magnitude > l2Norm[1])
      {
        l2Norm[1] = static_cast<T>(magnitude);
      }
    }
  }

  /**
   * @brief Merges the results from another partial reduction.
   * @param other The other ComputeRangeFunctor to merge from
   */
  void join(const ComputeRangeFunctor& other)
  {
    for(size_t comp = 0; comp < numComponents; ++comp)
    {
      if(other.compMin[comp] < compMin[comp])
      {
        compMin[comp] = other.compMin[comp];
      }
      if(other.compMax[comp] > compMax[comp])
      {
        compMax[comp] = other.compMax[comp];
      }
    }
    if(other.l2Norm[0] < l2Norm[0])
    {
      l2Norm[0] = other.l2Norm[0];
    }
    if(other.l2Norm[1] > l2Norm[1])
    {
      l2Norm[1] = other.l2Norm[1];
    }
  }
};

/**
 * @class CV::Array
 * @brief The CVArray class serves as a wrapper around a simplnx DataStore<T> to
 * make it available for use in VTK without duplicating the underlying data.
 * @tparam T The underlying primitive type
 */
template <class T>
class Array : public vtkGenericDataArray<CV::Array<T>, T>
{
public:
  using SimplnxArrayType = nx::core::DataArray<T>;
  using SimplnxArrayPointerType = std::shared_ptr<SimplnxArrayType>;

  using ValueType = T;
  using Superclass2 = vtkGenericDataArray<CV::Array<T>, T>;

  using DataStoreType = nx::core::DataStore<T>;
  using SharedDataStorePtrType = std::shared_ptr<DataStoreType>;

  vtkAbstractTypeMacro(CV::Array<T>, Superclass2);

  static inline const std::string MissingArrayName = "[Missing Array]";

  /**
   * @brief Creates a new instance of CV::Array. This is required of vtkObject derived classes
   * @return
   */
  static CV::Array<T>* New()
  {
    // std::cout << "CVArray::New()" << std::endl;
    auto arrayPtr = new CV::Array<T>();
    return arrayPtr;
  }

  VTK_NEWINSTANCE vtkArrayIterator* NewIterator() override
  {
    vtkArrayIterator* cvArrayIt = vtkArrayIteratorTemplate<T>::New();
    cvArrayIt->Initialize(this);
    return cvArrayIt;
  }

  Array(const Array&) = delete;
  Array(Array&&) noexcept = delete;
  Array& operator=(const Array&) = delete;
  Array& operator=(Array&&) noexcept = delete;

  ~Array() override = default;
  //  {
  //    std::cout << "CV:Array::~CVArray() " << this->GetName() << "  " << this->Size << std::endl;
  //  }

  // NOTE: uncomment to debug memory leak
  //  void Register(vtkObjectBase* o) override
  //  {
  //    vtkObjectBase::Register(o);
  //    std::cout << "CVArray::Register " << this->GetName() << "  " << this->GetReferenceCount() << "  " << reinterpret_cast<void*>(o) << std::endl;
  //  }
  //
  //  void UnRegister(vtkObjectBase* o) override
  //  {
  //    vtkObjectBase::UnRegister(o);
  //    std::cout << "CVArray::UnRegister " << this->GetName() << "  " << this->GetReferenceCount() << "  " << reinterpret_cast<void*>(o) << std::endl;
  //  }

  /**
   * @brief
   * @param dataArray
   */

  void MoveDataStoreFromSimplnxDataArray(SimplnxArrayType* dataArray)
  {
    if(dataArray == nullptr)
    {
      return;
    }
    if(dataArray->getDataFormat().empty() == false)
    {
      return;
    }
    // std::cout << "CVArray::InitializeWithDataStore()" << std::endl;

    // NOTE : We are reverting back to NOT moving the data store out of the data structure so that charting can have access to the data.
    //        We updated this to move the data store when re-architecting the visualization code, presumably as a work around for some
    //        issues that we no longer remember. At the time of reviewing this (8/21/2024 by Jackson, Duffey, & Marquis), as far as we can tell this works now.
    // SharedDataStorePtrType dataStorePtr = std::dynamic_pointer_cast<DataStoreType>(dataArray->getDataStorePtr().lock());
    // m_DataStore = SharedDataStorePtrType(new DataStoreType(std::move(*(dataStorePtr.get()))));
    m_DataStore = std::dynamic_pointer_cast<DataStoreType>(dataArray->getDataStorePtr().lock());

    SetName(dataArray->getName().c_str());

    this->NumberOfComponents = m_DataStore->getNumberOfComponents();
    this->Size = m_DataStore->getNumberOfTuples() * this->NumberOfComponents;
    this->MaxId = this->Size - 1;

    // Cache the range of data here
    ComputeMinMaxValuesInternal();

    // Are these needed?
    vtkGenericDataArray<CV::Array<T>, T>::SetNumberOfComponents(this->NumberOfComponents);
    vtkGenericDataArray<CV::Array<T>, T>::SetNumberOfTuples(m_DataStore->getNumberOfTuples());
    vtkGenericDataArray<CV::Array<T>, T>::SetObjectName(dataArray->getName().c_str());
  }

  /**
   * @brief Gets the value range of a component
   * @param range Output to store the min then max value
   * @param compIdx The index of the component to return the range. Zero based.
   */
  void GetFiniteValueRange(T* range, int compIdx)
  {

    if(compIdx != -1 && compIdx >= m_DataStore->getNumberOfComponents())
    {
      throw std::out_of_range(fmt::format("CV::Array::GetFiniteRange() compIdx '{}' out of range", compIdx));
    }
    if(!m_RangeIsCalculated)
    {
      ComputeMinMaxValuesInternal();
    }
    if(compIdx < 0)
    {
      // They want the L2 Norm (Magnitude of all components of a tuple)
      range[0] = m_L2Norm[0];
      range[1] = m_L2Norm[1];
      return;
    }
    range[0] = m_MinMaxValues[compIdx][0];
    range[1] = m_MinMaxValues[compIdx][1];
  }

  /**
   * @brief
   * @param name
   */
  void SetName(const char* name) override
  {
    Superclass::SetName(name);
  }

  /**
   * @brief Get the value at valueIdx.
   *
   * Note: GetTypedComponent is preferred over this method. It is faster for SOA arrays,
   * and shows equivalent performance for AOS arrays when NumberOfComponents is known
   * to the compiler (See vtkAssume.h).
   * @param valueIdx assumes AOS ordering.
   * @return T
   */
  inline ValueType GetValue(vtkIdType valueIdx) const
  {
    if(nullptr == m_DataStore)
    {
      throw std::runtime_error("CV::Array::GetValue() does not have an underlying nx::core::DataStore");
    }
    return (*m_DataStore)[valueIdx];
  }

  /**
   * @brief Set the value at valueIdx to value.
   *
   * Note: SetTypedComponent is preferred over this method. It is faster for SOA arrays, and shows equivalent performance for
   *  AOS arrays when NumberOfComponents is known to the compiler (See vtkAssume.h).
   * @param valueIdx assumes AOS ordering.
   * @param value
   */
  inline void SetValue(vtkIdType valueIdx, ValueType value)
  {
    if(nullptr == m_DataStore)
    {
      throw std::runtime_error("CV::Array::SetValue() does not have an underlying nx::core::DataStore");
    }
    (*m_DataStore)[valueIdx] = value;
  }

  /**
   * @brief Copy the tuple at tupleIdx into tuple.
   *
   * Note:GetTypedComponent is preferred over this method. The overhead of copying the tuple is significant compared to the
   *    more performant component-wise access methods, which typically optimize to raw memory access.
   * @param tupleIdx
   * @param tuple
   */
  inline void GetTypedTuple(vtkIdType tupleIdx, ValueType* tuple) const
  {
    if(nullptr == m_DataStore)
    {
      throw std::runtime_error("CV::Array::GetTypedTuple() does not have an underlying nx::core::DataStore");
    }

    const size_t numComps = m_DataStore->getNumberOfComponents();
    const size_t elementIndex = tupleIdx * numComps;
    for(size_t i = 0; i < numComps; i++)
    {
      tuple[i] = (*m_DataStore)[elementIndex + i];
    }
  }

  /**
   * @brief Set this array's tuple at tupleIdx to the values in tuple.
   *
   * Note: SetTypedComponent is preferred over this method. The overhead of copying
   * the tuple is significant compared to the more performant component-wise access
   * methods, which typically optimize to raw memory access.
   * @param tupleIdx
   * @param tuple
   */
  inline void SetTypedTuple(vtkIdType tupleIdx, const ValueType* tuple)
  {
    if(nullptr == m_DataStore)
    {
      throw std::runtime_error("CV::Array::SetTypedTuple() does not have an underlying nx::core::DataStore");
    }
    const size_t numComps = m_DataStore->getNumberOfComponents();
    const size_t elementIndex = tupleIdx * numComps;
    for(size_t i = 0; i < numComps; i++)
    {
      (*m_DataStore)[elementIndex + i] = tuple[i];
    }
  }

  /**
   * @brief Get component compIdx of the tuple at tupleIdx. This is typically the fastest way to access array data.
   * @param tupleIdx
   * @param compIdx
   * @return T
   */
  inline ValueType GetTypedComponent(vtkIdType tupleIdx, int compIdx) const
  {
    if(nullptr == m_DataStore)
    {
      throw std::runtime_error("CV::Array::GetTypedComponent() does not have an underlying nx::core::DataStore");
    }
    const auto elementIndex = tupleIdx * this->NumberOfComponents;
    if(elementIndex + compIdx >= (*m_DataStore).getSize())
    {
      throw std::runtime_error(fmt::format("CV::Array::GetTypedComponent() 'elementIndex + compIdx' of {} is greater than the number of elements {}", elementIndex + compIdx, m_DataStore->getSize()));
    }
    ValueType value = (*m_DataStore)[elementIndex + compIdx];
    //   std::cout << " * GetTypedComponent() " << tupleIdx << ":" << compIdx  << " = " << value << std::endl;
    return value;
  }

  /**
   * @brief Set component compIdx of the tuple at tupleIdx to value. This is typically the fastest way to set array data.
   * @param tupleIdx
   * @param compIdx
   * @param value
   */
  inline void SetTypedComponent(vtkIdType tupleIdx, int compIdx, ValueType value)
  {
    if(nullptr == m_DataStore)
    {
      throw std::runtime_error("CV::Array::SetTypedComponent() does not have an underlying nx::core::DataStore");
    }
    const auto elementIndex = tupleIdx * this->NumberOfComponents;
    if(elementIndex + compIdx >= (*m_DataStore).getSize())
    {
      throw std::runtime_error(fmt::format("CV::Array::SetTypedComponent() 'elementIndex + compIdx' of {} is greater than the number of elements {}", elementIndex + compIdx, m_DataStore->getSize()));
    }
    (*m_DataStore)[elementIndex + compIdx] = value;
  }

  /**
   * @brief Allocates space for a given number of tuples. Old data should *NOT* be preserved.
   * If numTuples == 0, all data is freed.
   * @param numTuples
   * @return bool
   */
  inline bool AllocateTuples(vtkIdType numTuples)
  {
    // If there is no difference in size, then just return true
    if(m_DataStore != nullptr && m_DataStore->getNumberOfTuples() == numTuples)
    {
      m_DataStore->fill(0); // Splat ZEROS across the array since the existing data should _NOT_ be preserved.
      return true;
    }

    // Allocate a brand new DataStore to use
    typename nx::core::ShapeType tupleShape = {static_cast<typename nx::core::ShapeType::value_type>(numTuples)};
    typename nx::core::ShapeType compShape = {1ULL};
    if(m_DataStore != nullptr)
    {
      compShape = m_DataStore->getComponentShape();
    }

    // std::cout << "CVArray::  " << this->GetName() << "  AllocateTuples: " << tupleShape[0] << "  " << compShape[0] << std::endl;

    m_DataStore = SharedDataStorePtrType(new DataStoreType(tupleShape, compShape, 0));

    // Now update the vtkGenericDataArray internal values
    this->NumberOfComponents = m_DataStore->getNumberOfComponents();
    this->Size = m_DataStore->getNumberOfTuples() * this->NumberOfComponents;
    // DO NOT set MaxId here!

    // Are these needed?
    vtkGenericDataArray<CV::Array<T>, T>::SetNumberOfComponents(this->NumberOfComponents);
    vtkGenericDataArray<CV::Array<T>, T>::SetNumberOfTuples(m_DataStore->getNumberOfTuples());

    return true;
  }

  /**
   * @brief Allocates space for a given number of tuples.  Old data *WILL* be preserved.
   * If numTuples == 0, all data is freed.
   * @param numTuples
   * @return bool
   */
  inline bool ReallocateTuples(vtkIdType numTuples)
  {
    if(m_DataStore == nullptr)
    {
      throw std::runtime_error("CV::Array::ReallocateTuples() does not have an underlying nx::core::DataStore");
    }

    typename nx::core::ShapeType tupleShape = {static_cast<typename nx::core::ShapeType::value_type>(numTuples)};
    typename nx::core::ShapeType compShape = m_DataStore->getComponentShape();

    SharedDataStorePtrType dataStoreTemp = SharedDataStorePtrType(new DataStoreType(tupleShape, compShape, 0));

    // std::cout << "ReallocateTuples: " << tupleShape[0] << "  " << compShape[0] << std::endl;

    // Copy the previous data over to the new underlying array. This could be done faster possibly
    size_t numElements = (dataStoreTemp->getSize() < m_DataStore->getSize() ? dataStoreTemp->getSize() : m_DataStore->getSize());
    for(size_t idx = 0; idx < numElements; idx++)
    {
      (*dataStoreTemp.get())[idx] = (*m_DataStore)[idx];
    }

    // Move the newly copied data from the temp location back to the current `DataStore<T>` object
    m_DataStore = std::move(dataStoreTemp);

    // Now update the vtkGenericDataArray internal values
    this->NumberOfComponents = m_DataStore->getNumberOfComponents();
    this->Size = numTuples * this->NumberOfComponents;
    // DO NOT set MaxId here!

    // Are these needed?
    vtkGenericDataArray<CV::Array<T>, T>::SetNumberOfComponents(this->NumberOfComponents);
    vtkGenericDataArray<CV::Array<T>, T>::SetNumberOfTuples(m_DataStore->getNumberOfTuples());

    return true;
  }

  /**
   * @brief GetVoidPointer
   * @param valueIdx
   * @return
   */
  void* GetVoidPointer(vtkIdType valueIdx) override
  {
    if(nullptr == m_DataStore)
    {
      return nullptr;
    }
    return m_DataStore->data() + valueIdx;
  }

protected:
  /**
   * @brief
   */
  Array()
  : Superclass()
  , m_DataStore(nullptr)
  {
  }

  /**
   * @brief NewInstanceInternal
   * @return
   */
  vtkObjectBase* NewInstanceInternal() const override
  {
    // std::cout << "CVArray::NewInstanceInternal()" << std::endl;
    auto arrayPtr = new CV::Array<T>();
    typename nx::core::ShapeType tupleShape = {0ULL};
    typename nx::core::ShapeType compShape = {1ULL};
    if(m_DataStore != nullptr)
    {
      compShape = m_DataStore->getComponentShape();
      arrayPtr->NumberOfComponents = m_DataStore->getNumberOfComponents();
      arrayPtr->Size = 0;
    }
    arrayPtr->m_DataStore = SharedDataStorePtrType(new DataStoreType(tupleShape, compShape, 0));
    return arrayPtr;
  }

private:
  SharedDataStorePtrType m_DataStore;
  bool m_RangeIsCalculated = false;
  std::vector<std::array<T, 2>> m_MinMaxValues;
  std::array<T, 2> m_L2Norm = {0, 0};

  void ComputeMinMaxValuesInternal()
  {
    // Compute per-component min/max using TBB parallel_reduce
    ComputeRangeFunctor<T> computeRangeFunctor(*m_DataStore, this->NumberOfComponents);
#ifdef SIMPLNX_ENABLE_MULTICORE
    tbb::parallel_reduce(tbb::blocked_range<size_t>(0, m_DataStore->getNumberOfTuples()), computeRangeFunctor);
#else
    computeRangeFunctor(tbb::blocked_range<size_t>(0, m_DataStore->getNumberOfTuples()));
#endif
    for(int i = 0; i < this->NumberOfComponents; ++i)
    {
      m_MinMaxValues.push_back({computeRangeFunctor.compMin[i], computeRangeFunctor.compMax[i]});
    }
    m_L2Norm[0] = computeRangeFunctor.l2Norm[0];
    m_L2Norm[1] = computeRangeFunctor.l2Norm[1];
    m_RangeIsCalculated = true;

    // This section will add the necessary vtkInformationKeys to this vtkDataArray
    // which makes upstream calls to "GetFiniteRange()" much quicker.
    std::vector<std::array<double, 2>> compRanges;
    compRanges.resize(this->NumberOfComponents);
    // std::vector<std::set<double>> compUniqueVals;
    // this->NumberOfComponents > 1 ? compUniqueVals.resize(this->NumberOfComponents + 1) : compUniqueVals.resize(1);
    for(int i = 0; i < this->NumberOfComponents; ++i)
    {
      compRanges[i][0] = computeRangeFunctor.compMin[i];
      compRanges[i][1] = computeRangeFunctor.compMax[i];
    }

    vtkSmartPointer<vtkInformation> info = this->GetInformation();
    vtkNew<vtkInformationVector> compRangeInfoVec;
    compRangeInfoVec->SetNumberOfInformationObjects(this->NumberOfComponents);
    // vtkNew<vtkInformationVector> compUniqueValsInfoVec;
    // compUniqueValsInfoVec->SetNumberOfInformationObjects(compUniqueVals.size() * 2);
    info->Set(vtkDataArray::PER_FINITE_COMPONENT(), compRangeInfoVec);
    // info->Set(getUniqueValuesInformationVectorKey(), compUniqueValsInfoVec);
    for(int i = 0; i < this->NumberOfComponents; ++i)
    {
      compRangeInfoVec->GetInformationObject(i)->Set(vtkDataArray::COMPONENT_RANGE(), compRanges[i].data(), 2);
    }

    // for(int i = 0; i < compUniqueVals.size(); ++i)
    // {
    //   std::vector<double> compUniqueValsVector(compUniqueVals[i].begin(), compUniqueVals[i].end());
    //   compUniqueValsInfoVec->GetInformationObject(i * 2 + 0)->Set(getCompNumUniqueValuesIntKey(i), compUniqueValsVector.size());
    //   compUniqueValsInfoVec->GetInformationObject(i * 2 + 1)->Set(getCompUniqueValuesDoubleVectorKey(i), compUniqueValsVector.data(), compUniqueValsVector.size());
    // }
  }
};
} // namespace CV

@sankhesh Thanks for the heads up (and follow up reminder from the other day). We did implement at least that one key and that seems to have had the desired effect of removing the longer computations from our profile trace.

@spyridon97 I posted our complete header (after updating with the information key) in another reply.

Thanks again for the help.