#include #include #include #include #include #include #include #include #include #include #include #include // This demonstrates that a reimplementation of vtkGenericDataArrayLookupHelper // using a std::unordered_map of std::vector of indices is faster in several // scenarios of values distributed in a vector. using Array = std::shared_ptr>; using Index = long long; using IndexVec = std::vector; // Relevant parts of the vtkGenericDataArrayLookupHelper interface struct Lookup { virtual ~Lookup() = default; void setArray( Array array ); virtual void clear() = 0; virtual std::string name() = 0; Index lookupValue( int value ); void lookupValue( int value, IndexVec& indices ); private: virtual Index findIndexOf( int value ) = 0; virtual void findIndicesOf( int value, IndexVec& indices ) = 0; virtual void update() = 0; protected: Array m_array; }; void Lookup::setArray( Array array ) { if ( array == m_array ) return; clear(); m_array = array; } // Looks up the first index at which the value occurs Index Lookup::lookupValue( int value ) { update(); auto idx = findIndexOf( value ); // Test if findIndicesOf did its job properly if ( idx != -1 && (*m_array)[idx] != value ) { std::cerr << name() << ": lookup of "< size ) { auto it = indices.begin(); std::advance( it, size ); for ( ; it != indices.end(); ++it ) { auto idx = *it; if ( idx != -1 && (*m_array)[idx] != value ) { std::cerr << name() << ": lookup of "<; std::vector m_valueIndices; std::vector::iterator m_endOfValues; }; void LookupSorted::clear() { m_valueIndices.clear(); m_endOfValues = m_valueIndices.end(); } // Find the first value-index pair and return its index Index LookupSorted::findIndexOf( int value ) { if ( m_valueIndices.front().first > value ) return -1; auto pos = std::lower_bound( m_valueIndices.begin(), m_endOfValues, value, [](auto& v1, auto& v2){ return v1.first < v2; } ); return pos != m_endOfValues ? (*pos).second : -1; } // Find all value-index pairs and return their indices void LookupSorted::findIndicesOf( int value, IndexVec& indices ) { ValueIndex v{ value, -1}; auto result = std::equal_range( m_valueIndices.begin(), m_endOfValues, v, []( const auto& v1, const auto& v2 ){ return v1.first < v2.first; } ); for ( auto pos = result.first; pos != result.second; ++pos ) { indices.push_back( (*pos).second ); } } // Generate the vector of value-index pairs void LookupSorted::makeValueIndexVec() { auto num = m_array->size(); m_valueIndices.reserve( num ); for ( Index i = 0; i < num; ++i ) { m_valueIndices.push_back( { (*m_array)[i], i } ); } } // ---------------------------------------------------------------------------- // Unstable sorting and partitioning struct LookupUnstable : LookupSorted { std::string name() override { return "unstable"; } private: void update() override; }; // Update using unstable partitioning and sorting algorithms void LookupUnstable::update() { if (m_array->empty() || !m_valueIndices.empty() ) return; makeValueIndexVec(); // move all nan's to the back m_endOfValues = std::partition( m_valueIndices.begin(), m_valueIndices.end(), [](auto v){ return !isnan(v.first);} ); assert( m_endOfValues == m_valueIndices.end() ); // sort everything else by value (first of pair) std::sort( m_valueIndices.begin(), m_endOfValues ); } // ---------------------------------------------------------------------------- // Stable sorting and partitioning struct LookupStable : LookupSorted { std::string name() override { return "stable"; } private: void update() override; }; // Update using stable partitioning and sorting algorithms void LookupStable::update() { if (m_array->empty() || !m_valueIndices.empty() ) return; makeValueIndexVec(); // move all nan's to the back m_endOfValues = std::stable_partition( m_valueIndices.begin(), m_valueIndices.end(), [](auto v){ return !isnan(v.first);} ); assert( m_endOfValues == m_valueIndices.end() ); // sort everything else by value (first of pair) std::stable_sort( m_valueIndices.begin(), m_endOfValues ); } // ---------------------------------------------------------------------------- // Build a map of values to vectors of indices struct LookupMapped : Lookup { std::string name() override { return "mapped"; } virtual void clear() override; private: void update() override; Index findIndexOf( int value ) override; void findIndicesOf( int value, IndexVec& indices ) override; using IndexMap = std::unordered_map>; IndexMap m_map; }; void LookupMapped::clear() { m_map.clear(); } // If value occurs in the Array there will be a non-empty array // with indices in sorted order. Return the first index. If the // value does not occur in the Array return -1 Index LookupMapped::findIndexOf( int value ) { auto pos = m_map.find( value ); return ( pos != m_map.end() ) ? (*pos).second.front() : -1; } // If value occurs in the Array there will be a non-empty array // with indices in sorted order. Copy all entries. If the // value does not occur in the Array do nothing. void LookupMapped::findIndicesOf( int value, IndexVec& indices ) { auto pos = m_map.find( value ); if ( pos != m_map.end() ) { const auto& vec( (*pos).second ); std::copy( vec.begin(), vec.end(), std::back_inserter(indices)); } } // Update by appending each index to the appropriate index vector void LookupMapped::update() { if ( m_array->empty() || !m_map.empty() ) return; auto num = m_array->size(); m_map.reserve( num ); // allocate sufficient memory before inserting for ( Index i = 0; i < num; ++i ) { m_map[(*m_array)[i]].push_back(i); } } // ============================================================================ // Generate an array of length @size with a random distribution of @num values Array MakeRandom( size_t size, int num ) { auto generator = [&num]()->int{ return 1+std::rand()/((RAND_MAX+1u)/num); }; Array random = std::make_shared>(); random->resize( size ); std::generate( random->begin(), random->end(), generator); return random; } // Generate an array of length @size with @num distinct values sorted increasing Array MakeSorted( size_t size, int num ) { auto array = std::make_shared>(); array->reserve( size ); for ( size_t i = 0; i < size; ++i ) array->push_back( 1 + i/(size/num) ); return array; } // Generate an array of length @size with @num distinct values sorted decreasing Array MakeReverse( size_t size, int num ) { auto array = std::make_shared>(); array->reserve( size ); for ( size_t i = 0; i < size; ++i ) array->push_back( size - (i/num) * num ); return array; } // ---------------------------------------------------------------------------- int main(int,char**) { static const size_t N = 1000000; // A list of lookup helper instances std::vector> lookups; lookups.push_back( std::make_shared() ); lookups.push_back( std::make_shared() ); lookups.push_back( std::make_shared() ); std::vector> arrays; arrays.push_back( { "random 1", MakeRandom( N, 10 ) } ); // few different values arrays.push_back( { "random 2", MakeRandom( N, N/10 ) } ); // many different values arrays.push_back( { "sorted 1", MakeSorted( N, 10 ) } ); // few different values arrays.push_back( { "sorted 2", MakeSorted( N, N/10 ) } ); // many different values arrays.push_back( { "reverse 1", MakeReverse( N, 10 ) } ); // few different values arrays.push_back( { "reverse 2", MakeReverse( N, N/10 ) } ); // many different values for ( auto it = arrays.begin(); it != arrays.end(); ++it ) { auto name = it->first; auto array = it->second; // Assign the current array and clear the auxiliary data structures for ( auto lookup : lookups ) { lookup->setArray( array ); lookup->clear(); } for ( auto lookup : lookups ) { // Look up a the first value. This takes longer than subsequent lookups // due to building the auxiliary data structures auto start = std::chrono::system_clock::now(); lookup->lookupValue( 0 ); // 0 does not exist in the array auto stop = std::chrono::system_clock::now(); std::chrono::duration elapsed_first = stop-start; // cumulative time of looking up a single index for all values from the array start = std::chrono::system_clock::now(); for ( auto v : *array ) { lookup->lookupValue( v ); } stop = std::chrono::system_clock::now(); std::chrono::duration elapsed_single = stop-start; // cumulative time of looking up a list of indices for all values from the array IndexVec is; start = std::chrono::system_clock::now(); for ( auto v : *array ) { is.clear(); lookup->lookupValue( v, is ); } stop = std::chrono::system_clock::now(); std::chrono::duration elapsed_all = stop-start; std::cout.precision(3); std::cout << std::scientific; std::cout << std::setw(10)<name() << std::setw(9)<