From 7cbafa8c65751d2eda7a17753c384da1fc91f695 Mon Sep 17 00:00:00 2001 From: Stephen Kelly Date: Sun, 1 Mar 2015 21:53:04 +0100 Subject: [PATCH 1/3] cmRemoveDuplicates: Store unique iterators instead of values. There is no need to copy all of the values in the container in order to determine uniqueness. Iterators can be stored instead and can be used with standard algorithms with custom comparison methods. This also means that we use less space in case the value_type size is greater than sizeof(iterator). That is common for std::string which may require up to 32 bytes (libstdc++ 5.0 and MSVC at least). With libstdc++ 4.9 and older, std::string is 8 bytes, so we likely don't gain anything here. Inspired-by: Daniel Pfeifer --- Source/cmAlgorithms.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/Source/cmAlgorithms.h b/Source/cmAlgorithms.h index f032de7ac..1b7029beb 100644 --- a/Source/cmAlgorithms.h +++ b/Source/cmAlgorithms.h @@ -176,6 +176,12 @@ private: Range const& m_range; }; +struct IterLess +{ + template + bool operator()(It const& a, It const& b) const { return *a < *b; } +}; + } template @@ -264,8 +270,8 @@ typename Range::const_iterator cmRemoveMatching(Range &r, MatchRange const& m) template typename Range::const_iterator cmRemoveDuplicates(Range& r) { - typedef std::vector UniqueVector; - UniqueVector unique; + typedef typename Range::const_iterator T; + std::vector unique; unique.reserve(r.size()); std::vector indices; size_t count = 0; @@ -273,11 +279,12 @@ typename Range::const_iterator cmRemoveDuplicates(Range& r) for(typename Range::const_iterator it = r.begin(); it != end; ++it, ++count) { - const typename UniqueVector::iterator low = - std::lower_bound(unique.begin(), unique.end(), *it); - if (low == unique.end() || *low != *it) + const typename std::vector::iterator low = + std::lower_bound(unique.begin(), unique.end(), it, + ContainerAlgorithms::IterLess()); + if (low == unique.end() || **low != *it) { - unique.insert(low, *it); + unique.insert(low, it); } else { From eec7091d76fc3db6535eec3f78fd2585b9c0c38a Mon Sep 17 00:00:00 2001 From: Stephen Kelly Date: Sun, 1 Mar 2015 21:57:16 +0100 Subject: [PATCH 2/3] cmRemoveDuplicates: Type-parameterize all uniq-operations --- Source/cmAlgorithms.h | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/Source/cmAlgorithms.h b/Source/cmAlgorithms.h index 1b7029beb..5504fee71 100644 --- a/Source/cmAlgorithms.h +++ b/Source/cmAlgorithms.h @@ -176,12 +176,6 @@ private: Range const& m_range; }; -struct IterLess -{ - template - bool operator()(It const& a, It const& b) const { return *a < *b; } -}; - } template @@ -267,10 +261,27 @@ typename Range::const_iterator cmRemoveMatching(Range &r, MatchRange const& m) ContainerAlgorithms::BinarySearcher(m)); } +namespace ContainerAlgorithms { + +template +struct RemoveDuplicatesAPI +{ + typedef typename Range::const_iterator const_iterator; + typedef typename Range::const_iterator value_type; + + static bool lessThan(value_type a, value_type b) { return *a < *b; } + static value_type uniqueValue(const_iterator a) { return a; } + template + static bool valueCompare(It it, const_iterator it2) { return **it != *it2; } +}; + +} + template typename Range::const_iterator cmRemoveDuplicates(Range& r) { - typedef typename Range::const_iterator T; + typedef typename ContainerAlgorithms::RemoveDuplicatesAPI API; + typedef typename API::value_type T; std::vector unique; unique.reserve(r.size()); std::vector indices; @@ -280,11 +291,11 @@ typename Range::const_iterator cmRemoveDuplicates(Range& r) it != end; ++it, ++count) { const typename std::vector::iterator low = - std::lower_bound(unique.begin(), unique.end(), it, - ContainerAlgorithms::IterLess()); - if (low == unique.end() || **low != *it) + std::lower_bound(unique.begin(), unique.end(), + API::uniqueValue(it), API::lessThan); + if (low == unique.end() || API::valueCompare(low, it)) { - unique.insert(low, it); + unique.insert(low, API::uniqueValue(it)); } else { From 8701a3f468a4fb684442a8a9c5d4c8d15c72eb7b Mon Sep 17 00:00:00 2001 From: Stephen Kelly Date: Sun, 8 Mar 2015 09:43:11 +0100 Subject: [PATCH 3/3] cmRemoveDuplicates: Partially specialize the API for pointer types. If de-duplicating a container of pointers, there is no need to store iterators to them, as that is just more 'pointer chasing'. Store the pointers themselves and use API which compares the pointers in the specialization. --- Source/cmAlgorithms.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/Source/cmAlgorithms.h b/Source/cmAlgorithms.h index 5504fee71..0cf770140 100644 --- a/Source/cmAlgorithms.h +++ b/Source/cmAlgorithms.h @@ -263,7 +263,7 @@ typename Range::const_iterator cmRemoveMatching(Range &r, MatchRange const& m) namespace ContainerAlgorithms { -template +template struct RemoveDuplicatesAPI { typedef typename Range::const_iterator const_iterator; @@ -275,6 +275,18 @@ struct RemoveDuplicatesAPI static bool valueCompare(It it, const_iterator it2) { return **it != *it2; } }; +template +struct RemoveDuplicatesAPI +{ + typedef typename Range::const_iterator const_iterator; + typedef T* value_type; + + static bool lessThan(value_type a, value_type b) { return a < b; } + static value_type uniqueValue(const_iterator a) { return *a; } + template + static bool valueCompare(It it, const_iterator it2) { return *it != *it2; } +}; + } template