qtbase/src/corelib/tools/qhash.h

2685 lines
84 KiB
C
Raw Normal View History

// Copyright (C) 2020 The Qt Company Ltd.
// Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
#ifndef QHASH_H
#define QHASH_H
#include <QtCore/qalgorithms.h>
#include <QtCore/qcontainertools_impl.h>
#include <QtCore/qhashfunctions.h>
#include <QtCore/qiterator.h>
#include <QtCore/qlist.h>
#include <QtCore/qrefcount.h>
#include <QtCore/qttypetraits.h>
#include <initializer_list>
#include <functional> // for std::hash
#include <QtCore/q20type_traits.h>
class tst_QHash; // for befriending
QT_BEGIN_NAMESPACE
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
struct QHashDummyValue
{
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
bool operator==(const QHashDummyValue &) const noexcept { return true; }
};
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
namespace QHashPrivate {
template <typename T, typename = void>
constexpr inline bool HasQHashOverload = false;
template <typename T>
constexpr inline bool HasQHashOverload<T, std::enable_if_t<
std::is_convertible_v<decltype(qHash(std::declval<const T &>(), std::declval<size_t>())), size_t>
>> = true;
template <typename T, typename = void>
constexpr inline bool HasStdHashSpecializationWithSeed = false;
template <typename T>
constexpr inline bool HasStdHashSpecializationWithSeed<T, std::enable_if_t<
std::is_convertible_v<decltype(std::hash<T>()(std::declval<const T &>(), std::declval<size_t>())), size_t>
>> = true;
template <typename T, typename = void>
constexpr inline bool HasStdHashSpecializationWithoutSeed = false;
template <typename T>
constexpr inline bool HasStdHashSpecializationWithoutSeed<T, std::enable_if_t<
std::is_convertible_v<decltype(std::hash<T>()(std::declval<const T &>())), size_t>
>> = true;
template <typename T>
size_t calculateHash(const T &t, size_t seed = 0)
{
if constexpr (HasQHashOverload<T>) {
return qHash(t, seed);
} else if constexpr (HasStdHashSpecializationWithSeed<T>) {
return std::hash<T>()(t, seed);
} else if constexpr (HasStdHashSpecializationWithoutSeed<T>) {
Q_UNUSED(seed);
return std::hash<T>()(t);
} else {
static_assert(QtPrivate::type_dependent_false<T>(), "The key type must have a qHash overload or a std::hash specialization");
return 0;
}
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
template <typename Key, typename T>
struct Node
{
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
using KeyType = Key;
using ValueType = T;
Key key;
T value;
template<typename ...Args>
static void createInPlace(Node *n, Key &&k, Args &&... args)
{ new (n) Node{ std::move(k), T(std::forward<Args>(args)...) }; }
template<typename ...Args>
static void createInPlace(Node *n, const Key &k, Args &&... args)
{ new (n) Node{ Key(k), T(std::forward<Args>(args)...) }; }
template<typename ...Args>
void emplaceValue(Args &&... args)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
value = T(std::forward<Args>(args)...);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
T &&takeValue() noexcept(std::is_nothrow_move_assignable_v<T>)
{
return std::move(value);
}
bool valuesEqual(const Node *other) const { return value == other->value; }
};
template <typename Key>
struct Node<Key, QHashDummyValue> {
using KeyType = Key;
using ValueType = QHashDummyValue;
Key key;
template<typename ...Args>
static void createInPlace(Node *n, Key &&k, Args &&...)
{ new (n) Node{ std::move(k) }; }
template<typename ...Args>
static void createInPlace(Node *n, const Key &k, Args &&...)
{ new (n) Node{ k }; }
template<typename ...Args>
void emplaceValue(Args &&...)
{
}
ValueType takeValue() { return QHashDummyValue(); }
bool valuesEqual(const Node *) const { return true; }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
};
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
template <typename T>
struct MultiNodeChain
{
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
T value;
MultiNodeChain *next = nullptr;
~MultiNodeChain()
{
}
qsizetype free() noexcept(std::is_nothrow_destructible_v<T>)
{
qsizetype nEntries = 0;
MultiNodeChain *e = this;
while (e) {
MultiNodeChain *n = e->next;
++nEntries;
delete e;
e = n;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
return nEntries;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
bool contains(const T &val) const noexcept
{
const MultiNodeChain *e = this;
while (e) {
if (e->value == val)
return true;
e = e->next;
}
return false;
}
};
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
template <typename Key, typename T>
struct MultiNode
{
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
using KeyType = Key;
using ValueType = T;
using Chain = MultiNodeChain<T>;
Key key;
Chain *value;
template<typename ...Args>
static void createInPlace(MultiNode *n, Key &&k, Args &&... args)
{ new (n) MultiNode(std::move(k), new Chain{ T(std::forward<Args>(args)...), nullptr }); }
template<typename ...Args>
static void createInPlace(MultiNode *n, const Key &k, Args &&... args)
{ new (n) MultiNode(k, new Chain{ T(std::forward<Args>(args)...), nullptr }); }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
MultiNode(const Key &k, Chain *c)
: key(k),
value(c)
{}
MultiNode(Key &&k, Chain *c) noexcept(std::is_nothrow_move_assignable_v<Key>)
: key(std::move(k)),
value(c)
{}
MultiNode(MultiNode &&other)
: key(other.key),
value(std::exchange(other.value, nullptr))
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
}
MultiNode(const MultiNode &other)
: key(other.key)
{
Chain *c = other.value;
Chain **e = &value;
while (c) {
Chain *chain = new Chain{ c->value, nullptr };
*e = chain;
e = &chain->next;
c = c->next;
}
}
~MultiNode()
{
if (value)
value->free();
}
static qsizetype freeChain(MultiNode *n) noexcept(std::is_nothrow_destructible_v<T>)
{
qsizetype size = n->value->free();
n->value = nullptr;
return size;
}
template<typename ...Args>
void insertMulti(Args &&... args)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
Chain *e = new Chain{ T(std::forward<Args>(args)...), nullptr };
e->next = std::exchange(value, e);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
template<typename ...Args>
void emplaceValue(Args &&... args)
{
value->value = T(std::forward<Args>(args)...);
}
};
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
template<typename Node>
constexpr bool isRelocatable()
{
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
return QTypeInfo<typename Node::KeyType>::isRelocatable && QTypeInfo<typename Node::ValueType>::isRelocatable;
}
struct SpanConstants {
static constexpr size_t SpanShift = 7;
static constexpr size_t NEntries = (1 << SpanShift);
static constexpr size_t LocalBucketMask = (NEntries - 1);
static constexpr size_t UnusedEntry = 0xff;
static_assert ((NEntries & LocalBucketMask) == 0, "NEntries must be a power of two.");
};
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
// Regular hash tables consist of a list of buckets that can store Nodes. But simply allocating one large array of buckets
// would waste a lot of memory. To avoid this, we split the vector of buckets up into a vector of Spans. Each Span represents
// NEntries buckets. To quickly find the correct Span that holds a bucket, NEntries must be a power of two.
//
// Inside each Span, there is an offset array that represents the actual buckets. offsets contains either an index into the
// actual storage space for the Nodes (the 'entries' member) or 0xff (UnusedEntry) to flag that the bucket is empty.
// As we have only 128 entries per Span, the offset array can be represented using an unsigned char. This trick makes the hash
// table have a very small memory overhead compared to many other implementations.
template<typename Node>
struct Span {
// Entry is a slot available for storing a Node. The Span holds a pointer to
// an array of Entries. Upon construction of the array, those entries are
// unused, and nextFree() is being used to set up a singly linked list
// of free entries.
// When a node gets inserted, the first free entry is being picked, removed
// from the singly linked list and the Node gets constructed in place.
struct Entry {
struct { alignas(Node) unsigned char data[sizeof(Node)]; } storage;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
unsigned char &nextFree() { return *reinterpret_cast<unsigned char *>(&storage); }
Node &node() { return *reinterpret_cast<Node *>(&storage); }
};
unsigned char offsets[SpanConstants::NEntries];
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
Entry *entries = nullptr;
unsigned char allocated = 0;
unsigned char nextFree = 0;
Span() noexcept
{
memset(offsets, SpanConstants::UnusedEntry, sizeof(offsets));
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
~Span()
{
freeData();
}
void freeData() noexcept(std::is_nothrow_destructible<Node>::value)
{
if (entries) {
if constexpr (!std::is_trivially_destructible<Node>::value) {
for (auto o : offsets) {
if (o != SpanConstants::UnusedEntry)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
entries[o].node().~Node();
}
}
delete[] entries;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
entries = nullptr;
}
}
Node *insert(size_t i)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
Q_ASSERT(i < SpanConstants::NEntries);
Q_ASSERT(offsets[i] == SpanConstants::UnusedEntry);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (nextFree == allocated)
addStorage();
unsigned char entry = nextFree;
Q_ASSERT(entry < allocated);
nextFree = entries[entry].nextFree();
offsets[i] = entry;
return &entries[entry].node();
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
void erase(size_t bucket) noexcept(std::is_nothrow_destructible<Node>::value)
{
Q_ASSERT(bucket < SpanConstants::NEntries);
Q_ASSERT(offsets[bucket] != SpanConstants::UnusedEntry);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
unsigned char entry = offsets[bucket];
offsets[bucket] = SpanConstants::UnusedEntry;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
entries[entry].node().~Node();
entries[entry].nextFree() = nextFree;
nextFree = entry;
}
size_t offset(size_t i) const noexcept
{
return offsets[i];
}
bool hasNode(size_t i) const noexcept
{
return (offsets[i] != SpanConstants::UnusedEntry);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
Node &at(size_t i) noexcept
{
Q_ASSERT(i < SpanConstants::NEntries);
Q_ASSERT(offsets[i] != SpanConstants::UnusedEntry);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
return entries[offsets[i]].node();
}
const Node &at(size_t i) const noexcept
{
Q_ASSERT(i < SpanConstants::NEntries);
Q_ASSERT(offsets[i] != SpanConstants::UnusedEntry);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
return entries[offsets[i]].node();
}
Node &atOffset(size_t o) noexcept
{
Q_ASSERT(o < allocated);
return entries[o].node();
}
const Node &atOffset(size_t o) const noexcept
{
Q_ASSERT(o < allocated);
return entries[o].node();
}
void moveLocal(size_t from, size_t to) noexcept
{
Q_ASSERT(offsets[from] != SpanConstants::UnusedEntry);
Q_ASSERT(offsets[to] == SpanConstants::UnusedEntry);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
offsets[to] = offsets[from];
offsets[from] = SpanConstants::UnusedEntry;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
void moveFromSpan(Span &fromSpan, size_t fromIndex, size_t to) noexcept(std::is_nothrow_move_constructible_v<Node>)
{
Q_ASSERT(to < SpanConstants::NEntries);
Q_ASSERT(offsets[to] == SpanConstants::UnusedEntry);
Q_ASSERT(fromIndex < SpanConstants::NEntries);
Q_ASSERT(fromSpan.offsets[fromIndex] != SpanConstants::UnusedEntry);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (nextFree == allocated)
addStorage();
Q_ASSERT(nextFree < allocated);
offsets[to] = nextFree;
Entry &toEntry = entries[nextFree];
nextFree = toEntry.nextFree();
size_t fromOffset = fromSpan.offsets[fromIndex];
fromSpan.offsets[fromIndex] = SpanConstants::UnusedEntry;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
Entry &fromEntry = fromSpan.entries[fromOffset];
if constexpr (isRelocatable<Node>()) {
memcpy(&toEntry, &fromEntry, sizeof(Entry));
} else {
new (&toEntry.node()) Node(std::move(fromEntry.node()));
fromEntry.node().~Node();
}
fromEntry.nextFree() = fromSpan.nextFree;
fromSpan.nextFree = static_cast<unsigned char>(fromOffset);
}
void addStorage()
{
Q_ASSERT(allocated < SpanConstants::NEntries);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
Q_ASSERT(nextFree == allocated);
// the hash table should always be between 25 and 50% full
// this implies that we on average have between 32 and 64 entries
// in here. More exactly, we have a binominal distribution of the amount of
// occupied entries.
// For a 25% filled table, the average is 32 entries, with a 95% chance that we have between
// 23 and 41 entries.
// For a 50% filled table, the average is 64 entries, with a 95% chance that we have between
// 53 and 75 entries.
// Since we only resize the table once it's 50% filled and we want to avoid copies of
// data where possible, we initially allocate 48 entries, then resize to 80 entries, after that
// resize by increments of 16. That way, we usually only get one resize of the table
// while filling it.
size_t alloc;
static_assert(SpanConstants::NEntries % 8 == 0);
if (!allocated)
alloc = SpanConstants::NEntries / 8 * 3;
else if (allocated == SpanConstants::NEntries / 8 * 3)
alloc = SpanConstants::NEntries / 8 * 5;
else
alloc = allocated + SpanConstants::NEntries/8;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
Entry *newEntries = new Entry[alloc];
// we only add storage if the previous storage was fully filled, so
// simply copy the old data over
if constexpr (isRelocatable<Node>()) {
if (allocated)
memcpy(newEntries, entries, allocated * sizeof(Entry));
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
} else {
for (size_t i = 0; i < allocated; ++i) {
new (&newEntries[i].node()) Node(std::move(entries[i].node()));
entries[i].node().~Node();
}
}
for (size_t i = allocated; i < alloc; ++i) {
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
newEntries[i].nextFree() = uchar(i + 1);
}
delete[] entries;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
entries = newEntries;
allocated = uchar(alloc);
}
};
// QHash uses a power of two growth policy.
namespace GrowthPolicy {
inline constexpr size_t bucketsForCapacity(size_t requestedCapacity) noexcept
{
constexpr int SizeDigits = std::numeric_limits<size_t>::digits;
// We want to use at minimum a full span (128 entries), so we hardcode it for any requested
// capacity <= 64. Any capacity above that gets rounded to a later power of two.
if (requestedCapacity <= 64)
return SpanConstants::NEntries;
// Same as
// qNextPowerOfTwo(2 * requestedCapacity);
//
// but ensuring neither our multiplication nor the function overflow.
// Additionally, the maximum memory allocation is 2^31-1 or 2^63-1 bytes
// (limited by qsizetype and ptrdiff_t).
int count = qCountLeadingZeroBits(requestedCapacity);
if (count < 2)
return (std::numeric_limits<size_t>::max)(); // will cause std::bad_alloc
return size_t(1) << (SizeDigits - count + 1);
}
inline constexpr size_t bucketForHash(size_t nBuckets, size_t hash) noexcept
{
return hash & (nBuckets - 1);
}
} // namespace GrowthPolicy
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
template <typename Node>
struct iterator;
template <typename Node>
struct Data
{
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
using Key = typename Node::KeyType;
using T = typename Node::ValueType;
using Span = QHashPrivate::Span<Node>;
using iterator = QHashPrivate::iterator<Node>;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
QtPrivate::RefCount ref = {{1}};
size_t size = 0;
size_t numBuckets = 0;
size_t seed = 0;
Span *spans = nullptr;
static constexpr size_t maxNumBuckets() noexcept
{
return (std::numeric_limits<ptrdiff_t>::max)() / sizeof(Span);
}
struct Bucket {
Span *span;
size_t index;
Bucket(Span *s, size_t i) noexcept
: span(s), index(i)
{}
Bucket(const Data *d, size_t bucket) noexcept
: span(d->spans + (bucket >> SpanConstants::SpanShift)),
index(bucket & SpanConstants::LocalBucketMask)
{}
Bucket(iterator it) noexcept
: Bucket(it.d, it.bucket)
{}
size_t toBucketIndex(const Data *d) const noexcept
{
return ((span - d->spans) << SpanConstants::SpanShift) | index;
}
iterator toIterator(const Data *d) const noexcept { return iterator{d, toBucketIndex(d)}; }
void advanceWrapped(const Data *d) noexcept
{
advance_impl(d, d->spans);
}
void advance(const Data *d) noexcept
{
advance_impl(d, nullptr);
}
bool isUnused() const noexcept
{
return !span->hasNode(index);
}
size_t offset() const noexcept
{
return span->offset(index);
}
Node &nodeAtOffset(size_t offset)
{
return span->atOffset(offset);
}
Node *node()
{
return &span->at(index);
}
Node *insert() const
{
return span->insert(index);
}
private:
friend bool operator==(Bucket lhs, Bucket rhs) noexcept
{
return lhs.span == rhs.span && lhs.index == rhs.index;
}
friend bool operator!=(Bucket lhs, Bucket rhs) noexcept { return !(lhs == rhs); }
void advance_impl(const Data *d, Span *whenAtEnd) noexcept
{
Q_ASSERT(span);
++index;
if (Q_UNLIKELY(index == SpanConstants::NEntries)) {
index = 0;
++span;
if (span - d->spans == ptrdiff_t(d->numBuckets >> SpanConstants::SpanShift))
span = whenAtEnd;
}
}
};
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
static auto allocateSpans(size_t numBuckets)
{
struct R {
Span *spans;
size_t nSpans;
};
constexpr qptrdiff MaxSpanCount = (std::numeric_limits<qptrdiff>::max)() / sizeof(Span);
constexpr size_t MaxBucketCount = MaxSpanCount << SpanConstants::SpanShift;
if (numBuckets > MaxBucketCount) {
Q_CHECK_PTR(false);
Q_UNREACHABLE(); // no exceptions and no assertions -> no error reporting
}
size_t nSpans = numBuckets >> SpanConstants::SpanShift;
return R{ new Span[nSpans], nSpans };
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
Data(size_t reserve = 0)
{
numBuckets = GrowthPolicy::bucketsForCapacity(reserve);
spans = allocateSpans(numBuckets).spans;
seed = QHashSeed::globalSeed();
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
void reallocationHelper(const Data &other, size_t nSpans, bool resized)
{
for (size_t s = 0; s < nSpans; ++s) {
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
const Span &span = other.spans[s];
for (size_t index = 0; index < SpanConstants::NEntries; ++index) {
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (!span.hasNode(index))
continue;
const Node &n = span.at(index);
auto it = resized ? findBucket(n.key) : Bucket { spans + s, index };
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
Q_ASSERT(it.isUnused());
Node *newNode = it.insert();
new (newNode) Node(n);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
}
}
Data(const Data &other) : size(other.size), numBuckets(other.numBuckets), seed(other.seed)
{
auto r = allocateSpans(numBuckets);
spans = r.spans;
reallocationHelper(other, r.nSpans, false);
}
Data(const Data &other, size_t reserved) : size(other.size), seed(other.seed)
{
numBuckets = GrowthPolicy::bucketsForCapacity(qMax(size, reserved));
spans = allocateSpans(numBuckets).spans;
size_t otherNSpans = other.numBuckets >> SpanConstants::SpanShift;
reallocationHelper(other, otherNSpans, numBuckets != other.numBuckets);
}
static Data *detached(Data *d)
{
if (!d)
return new Data;
Data *dd = new Data(*d);
if (!d->ref.deref())
delete d;
return dd;
}
static Data *detached(Data *d, size_t size)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (!d)
return new Data(size);
Data *dd = new Data(*d, size);
if (!d->ref.deref())
delete d;
return dd;
}
void clear()
{
delete[] spans;
spans = nullptr;
size = 0;
numBuckets = 0;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
iterator detachedIterator(iterator other) const noexcept
{
return iterator{this, other.bucket};
}
iterator begin() const noexcept
{
iterator it{ this, 0 };
if (it.isUnused())
++it;
return it;
}
constexpr iterator end() const noexcept
{
return iterator();
}
void rehash(size_t sizeHint = 0)
{
if (sizeHint == 0)
sizeHint = size;
size_t newBucketCount = GrowthPolicy::bucketsForCapacity(sizeHint);
Span *oldSpans = spans;
size_t oldBucketCount = numBuckets;
spans = allocateSpans(newBucketCount).spans;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
numBuckets = newBucketCount;
size_t oldNSpans = oldBucketCount >> SpanConstants::SpanShift;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
for (size_t s = 0; s < oldNSpans; ++s) {
Span &span = oldSpans[s];
for (size_t index = 0; index < SpanConstants::NEntries; ++index) {
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (!span.hasNode(index))
continue;
Node &n = span.at(index);
auto it = findBucket(n.key);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
Q_ASSERT(it.isUnused());
Node *newNode = it.insert();
new (newNode) Node(std::move(n));
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
span.freeData();
}
delete[] oldSpans;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
size_t nextBucket(size_t bucket) const noexcept
{
++bucket;
if (bucket == numBuckets)
bucket = 0;
return bucket;
}
float loadFactor() const noexcept
{
return float(size)/numBuckets;
}
bool shouldGrow() const noexcept
{
return size >= (numBuckets >> 1);
}
template <typename K> Bucket findBucket(const K &key) const noexcept
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
static_assert(std::is_same_v<std::remove_cv_t<Key>, K> ||
QHashHeterogeneousSearch<std::remove_cv_t<Key>, K>::value);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
Q_ASSERT(numBuckets > 0);
size_t hash = QHashPrivate::calculateHash(key, seed);
Bucket bucket(this, GrowthPolicy::bucketForHash(numBuckets, hash));
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
// loop over the buckets until we find the entry we search for
// or an empty slot, in which case we know the entry doesn't exist
while (true) {
size_t offset = bucket.offset();
if (offset == SpanConstants::UnusedEntry) {
return bucket;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
} else {
Node &n = bucket.nodeAtOffset(offset);
if (qHashEquals(n.key, key))
return bucket;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
bucket.advanceWrapped(this);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
}
template <typename K> Node *findNode(const K &key) const noexcept
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
auto bucket = findBucket(key);
if (bucket.isUnused())
return nullptr;
return bucket.node();
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
struct InsertionResult
{
iterator it;
bool initialized;
};
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
template <typename K> InsertionResult findOrInsert(const K &key) noexcept
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
Bucket it(static_cast<Span *>(nullptr), 0);
if (numBuckets > 0) {
it = findBucket(key);
if (!it.isUnused())
return { it.toIterator(this), true };
}
if (shouldGrow()) {
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
rehash(size + 1);
it = findBucket(key); // need to get a new iterator after rehashing
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
Q_ASSERT(it.span != nullptr);
Q_ASSERT(it.isUnused());
it.insert();
++size;
return { it.toIterator(this), false };
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
void erase(Bucket bucket) noexcept(std::is_nothrow_destructible<Node>::value)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
Q_ASSERT(bucket.span->hasNode(bucket.index));
bucket.span->erase(bucket.index);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
--size;
// re-insert the following entries to avoid holes
Bucket next = bucket;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
while (true) {
next.advanceWrapped(this);
size_t offset = next.offset();
if (offset == SpanConstants::UnusedEntry)
return;
size_t hash = QHashPrivate::calculateHash(next.nodeAtOffset(offset).key, seed);
Bucket newBucket(this, GrowthPolicy::bucketForHash(numBuckets, hash));
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
while (true) {
if (newBucket == next) {
// nothing to do, item is at the right plae
break;
} else if (newBucket == bucket) {
// move into the hole we created earlier
if (next.span == bucket.span) {
bucket.span->moveLocal(next.index, bucket.index);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
} else {
// move between spans, more expensive
bucket.span->moveFromSpan(*next.span, next.index, bucket.index);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
bucket = next;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
break;
}
newBucket.advanceWrapped(this);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
}
}
~Data()
{
delete [] spans;
}
};
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
template <typename Node>
struct iterator {
using Span = QHashPrivate::Span<Node>;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
const Data<Node> *d = nullptr;
size_t bucket = 0;
size_t span() const noexcept { return bucket >> SpanConstants::SpanShift; }
size_t index() const noexcept { return bucket & SpanConstants::LocalBucketMask; }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline bool isUnused() const noexcept { return !d->spans[span()].hasNode(index()); }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline Node *node() const noexcept
{
Q_ASSERT(!isUnused());
return &d->spans[span()].at(index());
}
bool atEnd() const noexcept { return !d; }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
iterator operator++() noexcept
{
while (true) {
++bucket;
if (bucket == d->numBuckets) {
d = nullptr;
bucket = 0;
break;
}
if (!isUnused())
break;
}
return *this;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
bool operator==(iterator other) const noexcept
{ return d == other.d && bucket == other.bucket; }
bool operator!=(iterator other) const noexcept
{ return !(*this == other); }
};
template <typename HashKey, typename KeyArgument>
using HeterogenousConstructProxy = std::conditional_t<
std::is_same_v<HashKey, q20::remove_cvref_t<KeyArgument>>,
KeyArgument, // HashKey == KeyArg w/ potential modifiers, so we keep modifiers
HashKey
>;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
} // namespace QHashPrivate
template <typename Key, typename T>
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
class QHash
{
using Node = QHashPrivate::Node<Key, T>;
using Data = QHashPrivate::Data<Node>;
friend class QSet<Key>;
friend class QMultiHash<Key, T>;
friend tst_QHash;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
Data *d = nullptr;
public:
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
using key_type = Key;
using mapped_type = T;
using value_type = T;
using size_type = qsizetype;
using difference_type = qsizetype;
using reference = T &;
using const_reference = const T &;
inline QHash() noexcept = default;
inline QHash(std::initializer_list<std::pair<Key,T> > list)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
: d(new Data(list.size()))
{
for (typename std::initializer_list<std::pair<Key,T> >::const_iterator it = list.begin(); it != list.end(); ++it)
insert(it->first, it->second);
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
QHash(const QHash &other) noexcept
: d(other.d)
{
if (d)
d->ref.ref();
}
~QHash()
{
static_assert(std::is_nothrow_destructible_v<Key>, "Types with throwing destructors are not supported in Qt containers.");
static_assert(std::is_nothrow_destructible_v<T>, "Types with throwing destructors are not supported in Qt containers.");
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (d && !d->ref.deref())
delete d;
}
QHash &operator=(const QHash &other) noexcept(std::is_nothrow_destructible<Node>::value)
{
if (d != other.d) {
Data *o = other.d;
if (o)
o->ref.ref();
if (d && !d->ref.deref())
delete d;
d = o;
}
return *this;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
QHash(QHash &&other) noexcept
: d(std::exchange(other.d, nullptr))
{
}
Centralize the implementation of move assignment operators At the moment we have two main strategies for dealing with move assignment in Qt: 1) move-and-swap, used by "containers" (in the broad sense): containers, but also smart pointers and similar classes that can hold user-defined types; 2) pure swap, used by containers that hold only memory (e.g. QString, QByteArray, ...) as well as most implicitly shared datatypes. Given the fact that a move assignment operator's code is just boilerplate (whether it's move-and-swap or pure swap), provide two _strictly internal_ macros to help write them, and apply the macros across corelib and gui, porting away from the hand-rolled implementations. The rule of thumb when porting to the new macros is: * Try to stick to the existing code behavior, unless broken * if changing, then follow this checklist: * if the class does not have a move constructor => pure swap (but consider ADDING a move constructor, if possible!) * if the class does have a move constructor, try to follow the criteria above, namely: * if the class holds only memory, pure swap; * if the class may hold anything else but memory (file handles, etc.), then move and swap. Noteworthy details: * some operators planned to be removed in Qt 6 were not ported; * as drive-by, some move constructors were simplified to be using qExchange(); others were outright broken and got fixed; * some contained some more interesting code and were not touched. Change-Id: Idaab3489247dcbabb6df3fa1e5286b69e1d372e9 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
2020-01-03 16:18:02 +00:00
QT_MOVE_ASSIGNMENT_OPERATOR_IMPL_VIA_MOVE_AND_SWAP(QHash)
#ifdef Q_QDOC
template <typename InputIterator>
QHash(InputIterator f, InputIterator l);
#else
template <typename InputIterator, QtPrivate::IfAssociativeIteratorHasKeyAndValue<InputIterator> = true>
QHash(InputIterator f, InputIterator l)
: QHash()
{
QtPrivate::reserveIfForwardIterator(this, f, l);
for (; f != l; ++f)
insert(f.key(), f.value());
}
template <typename InputIterator, QtPrivate::IfAssociativeIteratorHasFirstAndSecond<InputIterator> = true>
QHash(InputIterator f, InputIterator l)
: QHash()
{
QtPrivate::reserveIfForwardIterator(this, f, l);
for (; f != l; ++f) {
auto &&e = *f;
using V = decltype(e);
insert(std::forward<V>(e).first, std::forward<V>(e).second);
}
}
#endif
QtCore: replace qSwap with std::swap/member-swap where possible qSwap() is a monster that looks for ADL overloads of swap() and also detects the noexcept of the wrapped swap() function, so it should only be used when the argument type is unknown. In the vast majority of cases, the type is known to be efficiently std::swap()able or to have a member-swap. Call either of these. For the common case of pointer types, circumvent the expensive trait checks on std::swap() by providing a hand-rolled qt_ptr_swap() template, the advantage being that it can be unconditionally noexcept, removing all type traits instantiations. Don't document it, otherwise we'd be unable to pick it to 6.2. Effects on Clang -ftime-trace of a PCH'ed libQt6Gui.so build: before: **** Template sets that took longest to instantiate: [...] 27766 ms: qSwap<$> (9073 times, avg 3 ms) [...] 2806 ms: std::swap<$> (1229 times, avg 2 ms) (30572ms) after: **** Template sets that took longest to instantiate: [...] 5047 ms: qSwap<$> (641 times, avg 7 ms) [...] 3371 ms: std::swap<$> (1376 times, avg 2 ms) [qt_ptr_swap<$> does not appear in the top 400, so < 905ms] (< 9323ms) As a drive-by, remove superfluous inline keywords and template ornaments. Task-number: QTBUG-97601 Pick-to: 6.3 6.2 Change-Id: I88f9b4e3cbece268c4a1238b6d50e5712a1bab5a Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org> Reviewed-by: Joerg Bornemann <joerg.bornemann@qt.io> Reviewed-by: Fabian Kosmale <fabian.kosmale@qt.io> Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
2022-01-19 00:47:35 +00:00
void swap(QHash &other) noexcept { qt_ptr_swap(d, other.d); }
class const_iterator;
#ifndef Q_QDOC
private:
static bool compareIterators(const const_iterator &lhs, const const_iterator &rhs)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
return lhs.i.node()->valuesEqual(rhs.i.node());
}
template <typename AKey = Key, typename AT = T,
QTypeTraits::compare_eq_result_container<QHash, AKey, AT> = true>
friend bool comparesEqual(const QHash &lhs, const QHash &rhs) noexcept
{
if (lhs.d == rhs.d)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
return true;
if (lhs.size() != rhs.size())
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
return false;
for (const_iterator it = rhs.begin(); it != rhs.end(); ++it) {
const_iterator i = lhs.find(it.key());
if (i == lhs.end() || !compareIterators(i, it))
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
return false;
}
// all values must be the same as size is the same
return true;
}
QT_DECLARE_EQUALITY_OPERATORS_HELPER(QHash, QHash, /* non-constexpr */, noexcept,
template <typename AKey = Key, typename AT = T,
QTypeTraits::compare_eq_result_container<QHash, AKey, AT> = true>)
public:
#else
friend bool operator==(const QHash &lhs, const QHash &rhs) noexcept;
friend bool operator!=(const QHash &lhs, const QHash &rhs) noexcept;
#endif // Q_QDOC
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline qsizetype size() const noexcept { return d ? qsizetype(d->size) : 0; }
[[nodiscard]]
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline bool isEmpty() const noexcept { return !d || d->size == 0; }
inline qsizetype capacity() const noexcept { return d ? qsizetype(d->numBuckets >> 1) : 0; }
void reserve(qsizetype size)
{
// reserve(0) is used in squeeze()
if (size && (this->capacity() >= size))
return;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (isDetached())
d->rehash(size);
else
d = Data::detached(d, size_t(size));
}
inline void squeeze()
{
if (capacity())
reserve(0);
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline void detach() { if (!d || d->ref.isShared()) d = Data::detached(d); }
inline bool isDetached() const noexcept { return d && !d->ref.isShared(); }
bool isSharedWith(const QHash &other) const noexcept { return d == other.d; }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
void clear() noexcept(std::is_nothrow_destructible<Node>::value)
{
if (d && !d->ref.deref())
delete d;
d = nullptr;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
bool remove(const Key &key)
{
return removeImpl(key);
}
private:
template <typename K> bool removeImpl(const K &key)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (isEmpty()) // prevents detaching shared null
return false;
auto it = d->findBucket(key);
size_t bucket = it.toBucketIndex(d);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
detach();
it = typename Data::Bucket(d, bucket); // reattach in case of detach
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (it.isUnused())
return false;
d->erase(it);
return true;
}
public:
template <typename Predicate>
qsizetype removeIf(Predicate pred)
{
return QtPrivate::associative_erase_if(*this, pred);
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
T take(const Key &key)
{
return takeImpl(key);
}
private:
template <typename K> T takeImpl(const K &key)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (isEmpty()) // prevents detaching shared null
return T();
auto it = d->findBucket(key);
size_t bucket = it.toBucketIndex(d);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
detach();
it = typename Data::Bucket(d, bucket); // reattach in case of detach
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (it.isUnused())
return T();
T value = it.node()->takeValue();
d->erase(it);
return value;
}
public:
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
bool contains(const Key &key) const noexcept
{
if (!d)
return false;
return d->findNode(key) != nullptr;
}
qsizetype count(const Key &key) const noexcept
{
return contains(key) ? 1 : 0;
}
private:
const Key *keyImpl(const T &value) const noexcept
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (d) {
const_iterator i = begin();
while (i != end()) {
if (i.value() == value)
return &i.key();
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
++i;
}
}
return nullptr;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
public:
Key key(const T &value) const noexcept
{
if (auto *k = keyImpl(value))
return *k;
else
return Key();
}
Key key(const T &value, const Key &defaultKey) const noexcept
{
if (auto *k = keyImpl(value))
return *k;
else
return defaultKey;
}
private:
template <typename K>
T *valueImpl(const K &key) const noexcept
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (d) {
Node *n = d->findNode(key);
if (n)
return &n->value;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
return nullptr;
}
public:
T value(const Key &key) const noexcept
{
if (T *v = valueImpl(key))
return *v;
else
return T();
}
T value(const Key &key, const T &defaultValue) const noexcept
{
if (T *v = valueImpl(key))
return *v;
else
return defaultValue;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
T &operator[](const Key &key)
{
return *tryEmplace(key).iterator;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
const T operator[](const Key &key) const noexcept
{
return value(key);
}
QList<Key> keys() const { return QList<Key>(keyBegin(), keyEnd()); }
QList<Key> keys(const T &value) const
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
QList<Key> res;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
const_iterator i = begin();
while (i != end()) {
if (i.value() == value)
res.append(i.key());
++i;
}
return res;
}
QList<T> values() const { return QList<T>(begin(), end()); }
class iterator
{
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
using piter = typename QHashPrivate::iterator<Node>;
friend class const_iterator;
friend class QHash<Key, T>;
friend class QSet<Key>;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
piter i;
explicit inline iterator(piter it) noexcept : i(it) { }
public:
typedef std::forward_iterator_tag iterator_category;
typedef qptrdiff difference_type;
typedef T value_type;
typedef T *pointer;
typedef T &reference;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
constexpr iterator() noexcept = default;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline const Key &key() const noexcept { return i.node()->key; }
inline T &value() const noexcept { return i.node()->value; }
inline T &operator*() const noexcept { return i.node()->value; }
inline T *operator->() const noexcept { return &i.node()->value; }
inline bool operator==(const iterator &o) const noexcept { return i == o.i; }
inline bool operator!=(const iterator &o) const noexcept { return i != o.i; }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline iterator &operator++() noexcept
{
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
++i;
return *this;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline iterator operator++(int) noexcept
{
iterator r = *this;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
++i;
return r;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline bool operator==(const const_iterator &o) const noexcept { return i == o.i; }
inline bool operator!=(const const_iterator &o) const noexcept { return i != o.i; }
};
friend class iterator;
class const_iterator
{
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
using piter = typename QHashPrivate::iterator<Node>;
friend class iterator;
friend class QHash<Key, T>;
friend class QSet<Key>;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
piter i;
explicit inline const_iterator(piter it) : i(it) { }
public:
typedef std::forward_iterator_tag iterator_category;
typedef qptrdiff difference_type;
typedef T value_type;
typedef const T *pointer;
typedef const T &reference;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
constexpr const_iterator() noexcept = default;
inline const_iterator(const iterator &o) noexcept : i(o.i) { }
inline const Key &key() const noexcept { return i.node()->key; }
inline const T &value() const noexcept { return i.node()->value; }
inline const T &operator*() const noexcept { return i.node()->value; }
inline const T *operator->() const noexcept { return &i.node()->value; }
inline bool operator==(const const_iterator &o) const noexcept { return i == o.i; }
inline bool operator!=(const const_iterator &o) const noexcept { return i != o.i; }
inline const_iterator &operator++() noexcept
{
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
++i;
return *this;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline const_iterator operator++(int) noexcept
{
const_iterator r = *this;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
++i;
return r;
}
};
friend class const_iterator;
class key_iterator
{
const_iterator i;
public:
typedef typename const_iterator::iterator_category iterator_category;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
typedef qptrdiff difference_type;
typedef Key value_type;
typedef const Key *pointer;
typedef const Key &reference;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
key_iterator() noexcept = default;
explicit key_iterator(const_iterator o) noexcept : i(o) { }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
const Key &operator*() const noexcept { return i.key(); }
const Key *operator->() const noexcept { return &i.key(); }
bool operator==(key_iterator o) const noexcept { return i == o.i; }
bool operator!=(key_iterator o) const noexcept { return i != o.i; }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline key_iterator &operator++() noexcept { ++i; return *this; }
inline key_iterator operator++(int) noexcept { return key_iterator(i++);}
const_iterator base() const noexcept { return i; }
};
typedef QKeyValueIterator<const Key&, const T&, const_iterator> const_key_value_iterator;
typedef QKeyValueIterator<const Key&, T&, iterator> key_value_iterator;
// STL style
inline iterator begin() { if (!d) return iterator(); detach(); return iterator(d->begin()); }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline const_iterator begin() const noexcept { return d ? const_iterator(d->begin()): const_iterator(); }
inline const_iterator cbegin() const noexcept { return d ? const_iterator(d->begin()): const_iterator(); }
inline const_iterator constBegin() const noexcept { return d ? const_iterator(d->begin()): const_iterator(); }
inline iterator end() noexcept { return iterator(); }
inline const_iterator end() const noexcept { return const_iterator(); }
inline const_iterator cend() const noexcept { return const_iterator(); }
inline const_iterator constEnd() const noexcept { return const_iterator(); }
inline key_iterator keyBegin() const noexcept { return key_iterator(begin()); }
inline key_iterator keyEnd() const noexcept { return key_iterator(end()); }
inline key_value_iterator keyValueBegin() { return key_value_iterator(begin()); }
inline key_value_iterator keyValueEnd() { return key_value_iterator(end()); }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline const_key_value_iterator keyValueBegin() const noexcept { return const_key_value_iterator(begin()); }
inline const_key_value_iterator constKeyValueBegin() const noexcept { return const_key_value_iterator(begin()); }
inline const_key_value_iterator keyValueEnd() const noexcept { return const_key_value_iterator(end()); }
inline const_key_value_iterator constKeyValueEnd() const noexcept { return const_key_value_iterator(end()); }
Associative containers: add a way to obtain a key/value range Our associative containers' iterator's value_type isn't a destructurable type (yielding key/value). This means that something like for (auto [k, v] : map) doesn't even compile -- one can only "directly" iterate on the values. For quite some time we've had QKeyValueIterator to allow key/value iteration, but then one had to resort to a "traditional" for loop: for (auto i = map.keyValueBegin(), e = keyValueEnd(); i!=e; ++i) This can be easily packaged in an adaptor class, which is what this commmit does, thereby offering a C++17-compatible way to obtain key/value iteration over associative containers. Something possibly peculiar is the fact that the range so obtained is a range of pairs of references -- not a range of references to pairs. But that's easily explained by the fact that we have no pairs to build references to; hence, for (auto &[k, v] : map.asKeyValueRange()) doesn't compile (lvalue reference doesn't bind to prvalue pair). Instead, both of these compile: for (auto [k, v] : map.asKeyValueRange()) for (auto &&[k, v] : map.asKeyValueRange()) and in *both* cases one gets references to the keys/values in the map. If the map is non-const, the reference to the value is mutable. Last but not least, implement pinning for rvalue containers. [ChangeLog][QtCore][QMap] Added asKeyValueRange(). [ChangeLog][QtCore][QMultiMap] Added asKeyValueRange(). [ChangeLog][QtCore][QHash] Added asKeyValueRange(). [ChangeLog][QtCore][QMultiHash] Added asKeyValueRange(). Task-number: QTBUG-4615 Change-Id: Ic8506bff38b2f753494b21ab76f52e05c06ffc8b Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
2022-03-01 03:24:38 +00:00
auto asKeyValueRange() & { return QtPrivate::QKeyValueRange(*this); }
auto asKeyValueRange() const & { return QtPrivate::QKeyValueRange(*this); }
auto asKeyValueRange() && { return QtPrivate::QKeyValueRange(std::move(*this)); }
auto asKeyValueRange() const && { return QtPrivate::QKeyValueRange(std::move(*this)); }
struct TryEmplaceResult
{
QHash::iterator iterator;
bool inserted;
TryEmplaceResult() = default;
// Generated SMFs are fine!
TryEmplaceResult(QHash::iterator it, bool b)
: iterator(it), inserted(b)
{
}
// Implicit conversion _from_ the return-type of try_emplace:
Q_IMPLICIT TryEmplaceResult(const std::pair<key_value_iterator, bool> &p)
: iterator(p.first.base()), inserted(p.second)
{
}
// Implicit conversion _to_ the return-type of try_emplace:
Q_IMPLICIT operator std::pair<key_value_iterator, bool>()
{
return { key_value_iterator(iterator), inserted };
}
};
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
iterator erase(const_iterator it)
{
Q_ASSERT(it != constEnd());
detach();
// ensure a valid iterator across the detach:
iterator i = iterator{d->detachedIterator(it.i)};
typename Data::Bucket bucket(i.i);
d->erase(bucket);
if (bucket.toBucketIndex(d) == d->numBuckets - 1 || bucket.isUnused())
++i;
return i;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
std::pair<iterator, iterator> equal_range(const Key &key)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
return equal_range_impl(*this, key);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
std::pair<const_iterator, const_iterator> equal_range(const Key &key) const noexcept
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
return equal_range_impl(*this, key);
}
private:
template <typename Hash, typename K> static auto equal_range_impl(Hash &self, const K &key)
{
auto first = self.find(key);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
auto second = first;
if (second != decltype(first){})
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
++second;
return std::make_pair(first, second);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
template <typename K> iterator findImpl(const K &key)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (isEmpty()) // prevents detaching shared null
return end();
auto it = d->findBucket(key);
size_t bucket = it.toBucketIndex(d);
detach();
it = typename Data::Bucket(d, bucket); // reattach in case of detach
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (it.isUnused())
return end();
return iterator(it.toIterator(d));
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
template <typename K> const_iterator constFindImpl(const K &key) const noexcept
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (isEmpty())
return end();
auto it = d->findBucket(key);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (it.isUnused())
return end();
return const_iterator({d, it.toBucketIndex(d)});
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
public:
typedef iterator Iterator;
typedef const_iterator ConstIterator;
inline qsizetype count() const noexcept { return d ? qsizetype(d->size) : 0; }
iterator find(const Key &key)
{
return findImpl(key);
}
const_iterator find(const Key &key) const noexcept
{
return constFindImpl(key);
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
const_iterator constFind(const Key &key) const noexcept
{
return find(key);
}
iterator insert(const Key &key, const T &value)
{
return emplace(key, value);
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
void insert(const QHash &hash)
{
if (d == hash.d || !hash.d)
return;
if (!d) {
*this = hash;
return;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
detach();
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
for (auto it = hash.begin(); it != hash.end(); ++it)
emplace(it.key(), it.value());
}
template <typename ...Args>
iterator emplace(const Key &key, Args &&... args)
{
Key copy = key; // Needs to be explicit for MSVC 2019
return emplace(std::move(copy), std::forward<Args>(args)...);
}
template <typename ...Args>
iterator emplace(Key &&key, Args &&... args)
{
if (isDetached()) {
if (d->shouldGrow()) // Construct the value now so that no dangling references are used
return emplace_helper(std::move(key), T(std::forward<Args>(args)...));
return emplace_helper(std::move(key), std::forward<Args>(args)...);
}
// else: we must detach
const auto copy = *this; // keep 'args' alive across the detach/growth
detach();
return emplace_helper(std::move(key), std::forward<Args>(args)...);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
template <typename... Args>
TryEmplaceResult tryEmplace(const Key &key, Args &&...args)
{
return tryEmplace_impl(key, std::forward<Args>(args)...);
}
template <typename... Args>
TryEmplaceResult tryEmplace(Key &&key, Args &&...args)
{
return tryEmplace_impl(std::move(key), std::forward<Args>(args)...);
}
TryEmplaceResult tryInsert(const Key &key, const T &value)
{
return tryEmplace_impl(key, value);
}
template <typename... Args>
std::pair<key_value_iterator, bool> try_emplace(const Key &key, Args &&...args)
{
return tryEmplace_impl(key, std::forward<Args>(args)...);
}
template <typename... Args>
std::pair<key_value_iterator, bool> try_emplace(Key &&key, Args &&...args)
{
return tryEmplace_impl(std::move(key), std::forward<Args>(args)...);
}
template <typename... Args>
key_value_iterator try_emplace(const_iterator /*hint*/, const Key &key, Args &&...args)
{
return key_value_iterator(tryEmplace_impl(key, std::forward<Args>(args)...).iterator);
}
template <typename... Args>
key_value_iterator try_emplace(const_iterator /*hint*/, Key &&key, Args &&...args)
{
return key_value_iterator(tryEmplace_impl(std::move(key), std::forward<Args>(args)...).iterator);
}
private:
template <typename K, typename... Args>
TryEmplaceResult tryEmplace_impl(K &&key, Args &&...args)
{
if (!d)
detach();
QHash detachGuard;
typename Data::Bucket bucket = d->findBucket(key);
const bool shouldInsert = bucket.isUnused();
// Even if we don't insert we may have to detach because we are
// returning a non-const iterator:
if (!isDetached() || (shouldInsert && d->shouldGrow())) {
detachGuard = *this;
const bool resized = shouldInsert && d->shouldGrow();
const size_t bucketIndex = bucket.toBucketIndex(d);
// Like reserve(), but unconditionally detaching if no need to grow:
if (isDetached())
d->rehash(d->size + 1);
else
d = Data::detached(d, d->size + (shouldInsert ? 1 : 0));
bucket = resized ? d->findBucket(key) : typename Data::Bucket(d, bucketIndex);
}
if (shouldInsert) {
Node *n = bucket.insert();
using ConstructProxy = typename QHashPrivate::HeterogenousConstructProxy<Key, K>;
Node::createInPlace(n, ConstructProxy(std::forward<K>(key)),
std::forward<Args>(args)...);
++d->size;
}
return {iterator(bucket.toIterator(d)), shouldInsert};
}
public:
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
float load_factor() const noexcept { return d ? d->loadFactor() : 0; }
static float max_load_factor() noexcept { return 0.5; }
size_t bucket_count() const noexcept { return d ? d->numBuckets : 0; }
static size_t max_bucket_count() noexcept { return Data::maxNumBuckets(); }
[[nodiscard]]
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline bool empty() const noexcept { return isEmpty(); }
private:
template <typename ...Args>
iterator emplace_helper(Key &&key, Args &&... args)
{
auto result = d->findOrInsert(key);
if (!result.initialized)
Node::createInPlace(result.it.node(), std::move(key), std::forward<Args>(args)...);
else
result.it.node()->emplaceValue(std::forward<Args>(args)...);
return iterator(result.it);
}
template <typename K>
using if_heterogeneously_searchable = QHashPrivate::if_heterogeneously_searchable_with<Key, K>;
template <typename K>
using if_key_constructible_from = std::enable_if_t<std::is_constructible_v<Key, K>, bool>;
public:
template <typename K, if_heterogeneously_searchable<K> = true>
bool remove(const K &key)
{
return removeImpl(key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
T take(const K &key)
{
return takeImpl(key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
bool contains(const K &key) const
{
return d ? d->findNode(key) != nullptr : false;
}
template <typename K, if_heterogeneously_searchable<K> = true>
qsizetype count(const K &key) const
{
return contains(key) ? 1 : 0;
}
template <typename K, if_heterogeneously_searchable<K> = true>
T value(const K &key) const noexcept
{
if (auto *v = valueImpl(key))
return *v;
else
return T();
}
template <typename K, if_heterogeneously_searchable<K> = true>
T value(const K &key, const T &defaultValue) const noexcept
{
if (auto *v = valueImpl(key))
return *v;
else
return defaultValue;
}
template <typename K, if_heterogeneously_searchable<K> = true, if_key_constructible_from<K> = true>
T &operator[](const K &key)
{
return *tryEmplace(key).iterator;
}
template <typename K, if_heterogeneously_searchable<K> = true>
const T operator[](const K &key) const noexcept
{
return value(key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
std::pair<iterator, iterator>
equal_range(const K &key)
{
return equal_range_impl(*this, key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
std::pair<const_iterator, const_iterator>
equal_range(const K &key) const noexcept
{
return equal_range_impl(*this, key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
iterator find(const K &key)
{
return findImpl(key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
const_iterator find(const K &key) const noexcept
{
return constFindImpl(key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
const_iterator constFind(const K &key) const noexcept
{
return find(key);
}
template <typename K, typename... Args, if_heterogeneously_searchable<K> = true, if_key_constructible_from<K> = true>
TryEmplaceResult tryEmplace(K &&key, Args &&...args)
{
return tryEmplace_impl(std::forward<K>(key), std::forward<Args>(args)...);
}
template <typename K, if_heterogeneously_searchable<K> = true, if_key_constructible_from<K> = true>
TryEmplaceResult tryInsert(K &&key, const T &value)
{
return tryEmplace_impl(std::forward<K>(key), value);
}
template <typename K, typename... Args, if_heterogeneously_searchable<K> = true, if_key_constructible_from<K> = true>
std::pair<key_value_iterator, bool> try_emplace(K &&key, Args &&...args)
{
return tryEmplace_impl(std::forward<K>(key), std::forward<Args>(args)...);
}
template <typename K, typename... Args, if_heterogeneously_searchable<K> = true, if_key_constructible_from<K> = true>
key_value_iterator try_emplace(const_iterator /*hint*/, K &&key, Args &&...args)
{
return key_value_iterator(tryEmplace_impl(std::forward<K>(key), std::forward<Args>(args)...).iterator);
}
};
template <typename Key, typename T>
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
class QMultiHash
{
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
using Node = QHashPrivate::MultiNode<Key, T>;
using Data = QHashPrivate::Data<Node>;
using Chain = QHashPrivate::MultiNodeChain<T>;
Data *d = nullptr;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
qsizetype m_size = 0;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
public:
using key_type = Key;
using mapped_type = T;
using value_type = T;
using size_type = qsizetype;
using difference_type = qsizetype;
using reference = T &;
using const_reference = const T &;
QMultiHash() noexcept = default;
inline QMultiHash(std::initializer_list<std::pair<Key,T> > list)
: d(new Data(list.size()))
{
for (typename std::initializer_list<std::pair<Key,T> >::const_iterator it = list.begin(); it != list.end(); ++it)
insert(it->first, it->second);
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
#ifdef Q_QDOC
template <typename InputIterator>
QMultiHash(InputIterator f, InputIterator l);
#else
template <typename InputIterator, QtPrivate::IfAssociativeIteratorHasKeyAndValue<InputIterator> = true>
QMultiHash(InputIterator f, InputIterator l)
{
QtPrivate::reserveIfForwardIterator(this, f, l);
for (; f != l; ++f)
insert(f.key(), f.value());
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
template <typename InputIterator, QtPrivate::IfAssociativeIteratorHasFirstAndSecond<InputIterator> = true>
QMultiHash(InputIterator f, InputIterator l)
{
QtPrivate::reserveIfForwardIterator(this, f, l);
for (; f != l; ++f) {
auto &&e = *f;
using V = decltype(e);
insert(std::forward<V>(e).first, std::forward<V>(e).second);
}
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
#endif
QMultiHash(const QMultiHash &other) noexcept
: d(other.d), m_size(other.m_size)
{
if (d)
d->ref.ref();
}
~QMultiHash()
{
static_assert(std::is_nothrow_destructible_v<Key>, "Types with throwing destructors are not supported in Qt containers.");
static_assert(std::is_nothrow_destructible_v<T>, "Types with throwing destructors are not supported in Qt containers.");
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (d && !d->ref.deref())
delete d;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
QMultiHash &operator=(const QMultiHash &other) noexcept(std::is_nothrow_destructible<Node>::value)
{
if (d != other.d) {
Data *o = other.d;
if (o)
o->ref.ref();
if (d && !d->ref.deref())
delete d;
d = o;
m_size = other.m_size;
}
return *this;
}
QMultiHash(QMultiHash &&other) noexcept
: d(std::exchange(other.d, nullptr)),
m_size(std::exchange(other.m_size, 0))
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
}
QMultiHash &operator=(QMultiHash &&other) noexcept(std::is_nothrow_destructible<Node>::value)
{
QMultiHash moved(std::move(other));
swap(moved);
return *this;
}
explicit QMultiHash(const QHash<Key, T> &other)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
: QMultiHash(other.begin(), other.end())
{}
explicit QMultiHash(QHash<Key, T> &&other)
{
unite(std::move(other));
}
QtCore: replace qSwap with std::swap/member-swap where possible qSwap() is a monster that looks for ADL overloads of swap() and also detects the noexcept of the wrapped swap() function, so it should only be used when the argument type is unknown. In the vast majority of cases, the type is known to be efficiently std::swap()able or to have a member-swap. Call either of these. For the common case of pointer types, circumvent the expensive trait checks on std::swap() by providing a hand-rolled qt_ptr_swap() template, the advantage being that it can be unconditionally noexcept, removing all type traits instantiations. Don't document it, otherwise we'd be unable to pick it to 6.2. Effects on Clang -ftime-trace of a PCH'ed libQt6Gui.so build: before: **** Template sets that took longest to instantiate: [...] 27766 ms: qSwap<$> (9073 times, avg 3 ms) [...] 2806 ms: std::swap<$> (1229 times, avg 2 ms) (30572ms) after: **** Template sets that took longest to instantiate: [...] 5047 ms: qSwap<$> (641 times, avg 7 ms) [...] 3371 ms: std::swap<$> (1376 times, avg 2 ms) [qt_ptr_swap<$> does not appear in the top 400, so < 905ms] (< 9323ms) As a drive-by, remove superfluous inline keywords and template ornaments. Task-number: QTBUG-97601 Pick-to: 6.3 6.2 Change-Id: I88f9b4e3cbece268c4a1238b6d50e5712a1bab5a Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org> Reviewed-by: Joerg Bornemann <joerg.bornemann@qt.io> Reviewed-by: Fabian Kosmale <fabian.kosmale@qt.io> Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
2022-01-19 00:47:35 +00:00
void swap(QMultiHash &other) noexcept
{
qt_ptr_swap(d, other.d);
std::swap(m_size, other.m_size);
}
#ifndef Q_QDOC
private:
template <typename AKey = Key, typename AT = T,
QTypeTraits::compare_eq_result_container<QMultiHash, AKey, AT> = true>
friend bool comparesEqual(const QMultiHash &lhs, const QMultiHash &rhs) noexcept
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (lhs.d == rhs.d)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
return true;
if (lhs.m_size != rhs.m_size)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
return false;
if (lhs.m_size == 0)
return true;
// equal size, and both non-zero size => d pointers allocated for both
Q_ASSERT(lhs.d);
Q_ASSERT(rhs.d);
if (lhs.d->size != rhs.d->size)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
return false;
for (auto it = rhs.d->begin(); it != rhs.d->end(); ++it) {
auto *n = lhs.d->findNode(it.node()->key);
if (!n)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
return false;
Chain *e = it.node()->value;
while (e) {
Chain *oe = n->value;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
while (oe) {
if (oe->value == e->value)
break;
oe = oe->next;
}
if (!oe)
return false;
e = e->next;
}
}
// all values must be the same as size is the same
return true;
}
QT_DECLARE_EQUALITY_OPERATORS_HELPER(QMultiHash, QMultiHash, /* non-constexpr */, noexcept,
template <typename AKey = Key, typename AT = T,
QTypeTraits::compare_eq_result_container<QMultiHash, AKey, AT> = true>)
public:
#else
friend bool operator==(const QMultiHash &lhs, const QMultiHash &rhs) noexcept;
friend bool operator!=(const QMultiHash &lhs, const QMultiHash &rhs) noexcept;
#endif // Q_QDOC
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline qsizetype size() const noexcept { return m_size; }
[[nodiscard]]
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline bool isEmpty() const noexcept { return !m_size; }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline qsizetype capacity() const noexcept { return d ? qsizetype(d->numBuckets >> 1) : 0; }
void reserve(qsizetype size)
{
// reserve(0) is used in squeeze()
if (size && (this->capacity() >= size))
return;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (isDetached())
d->rehash(size);
else
d = Data::detached(d, size_t(size));
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline void squeeze() { reserve(0); }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline void detach() { if (!d || d->ref.isShared()) d = Data::detached(d); }
inline bool isDetached() const noexcept { return d && !d->ref.isShared(); }
bool isSharedWith(const QMultiHash &other) const noexcept { return d == other.d; }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
void clear() noexcept(std::is_nothrow_destructible<Node>::value)
{
if (d && !d->ref.deref())
delete d;
d = nullptr;
m_size = 0;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
qsizetype remove(const Key &key)
{
return removeImpl(key);
}
private:
template <typename K> qsizetype removeImpl(const K &key)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (isEmpty()) // prevents detaching shared null
return 0;
auto it = d->findBucket(key);
size_t bucket = it.toBucketIndex(d);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
detach();
it = typename Data::Bucket(d, bucket); // reattach in case of detach
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (it.isUnused())
return 0;
qsizetype n = Node::freeChain(it.node());
m_size -= n;
Q_ASSERT(m_size >= 0);
d->erase(it);
return n;
}
public:
template <typename Predicate>
qsizetype removeIf(Predicate pred)
{
return QtPrivate::associative_erase_if(*this, pred);
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
T take(const Key &key)
{
return takeImpl(key);
}
private:
template <typename K> T takeImpl(const K &key)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (isEmpty()) // prevents detaching shared null
return T();
auto it = d->findBucket(key);
size_t bucket = it.toBucketIndex(d);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
detach();
it = typename Data::Bucket(d, bucket); // reattach in case of detach
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (it.isUnused())
return T();
Chain *e = it.node()->value;
Q_ASSERT(e);
T t = std::move(e->value);
if (e->next) {
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
it.node()->value = e->next;
delete e;
} else {
// erase() deletes the values.
d->erase(it);
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
--m_size;
Q_ASSERT(m_size >= 0);
return t;
}
public:
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
bool contains(const Key &key) const noexcept
{
if (!d)
return false;
return d->findNode(key) != nullptr;
}
private:
const Key *keyImpl(const T &value) const noexcept
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (d) {
auto i = d->begin();
while (i != d->end()) {
Chain *e = i.node()->value;
if (e->contains(value))
return &i.node()->key;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
++i;
}
}
return nullptr;
}
public:
Key key(const T &value) const noexcept
{
if (auto *k = keyImpl(value))
return *k;
else
return Key();
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
Key key(const T &value, const Key &defaultKey) const noexcept
{
if (auto *k = keyImpl(value))
return *k;
else
return defaultKey;
}
private:
template <typename K>
T *valueImpl(const K &key) const noexcept
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (d) {
Node *n = d->findNode(key);
if (n) {
Q_ASSERT(n->value);
return &n->value->value;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
}
return nullptr;
}
public:
T value(const Key &key) const noexcept
{
if (auto *v = valueImpl(key))
return *v;
else
return T();
}
T value(const Key &key, const T &defaultValue) const noexcept
{
if (auto *v = valueImpl(key))
return *v;
else
return defaultValue;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
T &operator[](const Key &key)
{
return operatorIndexImpl(key);
}
private:
template <typename K> T &operatorIndexImpl(const K &key)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
const auto copy = isDetached() ? QMultiHash() : *this; // keep 'key' alive across the detach
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
detach();
auto result = d->findOrInsert(key);
Q_ASSERT(!result.it.atEnd());
if (!result.initialized) {
Node::createInPlace(result.it.node(), Key(key), T());
++m_size;
}
return result.it.node()->value->value;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
public:
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
const T operator[](const Key &key) const noexcept
{
return value(key);
}
QList<Key> uniqueKeys() const
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
QList<Key> res;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (d) {
auto i = d->begin();
while (i != d->end()) {
res.append(i.node()->key);
++i;
}
}
return res;
}
QList<Key> keys() const { return QList<Key>(keyBegin(), keyEnd()); }
QList<Key> keys(const T &value) const
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
QList<Key> res;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
const_iterator i = begin();
while (i != end()) {
if (i.value() == value)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
res.append(i.key());
++i;
}
return res;
}
QList<T> values() const { return QList<T>(begin(), end()); }
QList<T> values(const Key &key) const
{
return valuesImpl(key);
}
private:
template <typename K> QList<T> valuesImpl(const K &key) const
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
QList<T> values;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (d) {
Node *n = d->findNode(key);
if (n) {
Chain *e = n->value;
while (e) {
values.append(e->value);
e = e->next;
}
}
}
return values;
}
public:
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
class const_iterator;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
class iterator
{
using piter = typename QHashPrivate::iterator<Node>;
friend class const_iterator;
friend class QMultiHash<Key, T>;
piter i;
Chain **e = nullptr;
explicit inline iterator(piter it, Chain **entry = nullptr) noexcept : i(it), e(entry)
{
if (!it.atEnd() && !e) {
e = &it.node()->value;
Q_ASSERT(e && *e);
}
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
public:
typedef std::forward_iterator_tag iterator_category;
typedef qptrdiff difference_type;
typedef T value_type;
typedef T *pointer;
typedef T &reference;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
constexpr iterator() noexcept = default;
inline const Key &key() const noexcept { return i.node()->key; }
inline T &value() const noexcept { return (*e)->value; }
inline T &operator*() const noexcept { return (*e)->value; }
inline T *operator->() const noexcept { return &(*e)->value; }
inline bool operator==(const iterator &o) const noexcept { return e == o.e; }
inline bool operator!=(const iterator &o) const noexcept { return e != o.e; }
inline iterator &operator++() noexcept {
Q_ASSERT(e && *e);
e = &(*e)->next;
Q_ASSERT(e);
if (!*e) {
++i;
e = i.atEnd() ? nullptr : &i.node()->value;
}
return *this;
}
inline iterator operator++(int) noexcept {
iterator r = *this;
++(*this);
return r;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline bool operator==(const const_iterator &o) const noexcept { return e == o.e; }
inline bool operator!=(const const_iterator &o) const noexcept { return e != o.e; }
};
friend class iterator;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
class const_iterator
{
using piter = typename QHashPrivate::iterator<Node>;
friend class iterator;
friend class QMultiHash<Key, T>;
piter i;
Chain **e = nullptr;
explicit inline const_iterator(piter it, Chain **entry = nullptr) noexcept : i(it), e(entry)
{
if (!it.atEnd() && !e) {
e = &it.node()->value;
Q_ASSERT(e && *e);
}
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
public:
typedef std::forward_iterator_tag iterator_category;
typedef qptrdiff difference_type;
typedef T value_type;
typedef const T *pointer;
typedef const T &reference;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
constexpr const_iterator() noexcept = default;
inline const_iterator(const iterator &o) noexcept : i(o.i), e(o.e) { }
inline const Key &key() const noexcept { return i.node()->key; }
inline T &value() const noexcept { return (*e)->value; }
inline T &operator*() const noexcept { return (*e)->value; }
inline T *operator->() const noexcept { return &(*e)->value; }
inline bool operator==(const const_iterator &o) const noexcept { return e == o.e; }
inline bool operator!=(const const_iterator &o) const noexcept { return e != o.e; }
inline const_iterator &operator++() noexcept {
Q_ASSERT(e && *e);
e = &(*e)->next;
Q_ASSERT(e);
if (!*e) {
++i;
e = i.atEnd() ? nullptr : &i.node()->value;
}
return *this;
}
inline const_iterator operator++(int) noexcept
{
const_iterator r = *this;
++(*this);
return r;
}
};
friend class const_iterator;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
class key_iterator
{
const_iterator i;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
public:
typedef typename const_iterator::iterator_category iterator_category;
typedef qptrdiff difference_type;
typedef Key value_type;
typedef const Key *pointer;
typedef const Key &reference;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
key_iterator() noexcept = default;
explicit key_iterator(const_iterator o) noexcept : i(o) { }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
const Key &operator*() const noexcept { return i.key(); }
const Key *operator->() const noexcept { return &i.key(); }
bool operator==(key_iterator o) const noexcept { return i == o.i; }
bool operator!=(key_iterator o) const noexcept { return i != o.i; }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline key_iterator &operator++() noexcept { ++i; return *this; }
inline key_iterator operator++(int) noexcept { return key_iterator(i++);}
const_iterator base() const noexcept { return i; }
};
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
typedef QKeyValueIterator<const Key&, const T&, const_iterator> const_key_value_iterator;
typedef QKeyValueIterator<const Key&, T&, iterator> key_value_iterator;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
// STL style
inline iterator begin() { if (!d) return iterator(); detach(); return iterator(d->begin()); }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline const_iterator begin() const noexcept { return d ? const_iterator(d->begin()): const_iterator(); }
inline const_iterator cbegin() const noexcept { return d ? const_iterator(d->begin()): const_iterator(); }
inline const_iterator constBegin() const noexcept { return d ? const_iterator(d->begin()): const_iterator(); }
inline iterator end() noexcept { return iterator(); }
inline const_iterator end() const noexcept { return const_iterator(); }
inline const_iterator cend() const noexcept { return const_iterator(); }
inline const_iterator constEnd() const noexcept { return const_iterator(); }
inline key_iterator keyBegin() const noexcept { return key_iterator(begin()); }
inline key_iterator keyEnd() const noexcept { return key_iterator(end()); }
inline key_value_iterator keyValueBegin() noexcept { return key_value_iterator(begin()); }
inline key_value_iterator keyValueEnd() noexcept { return key_value_iterator(end()); }
inline const_key_value_iterator keyValueBegin() const noexcept { return const_key_value_iterator(begin()); }
inline const_key_value_iterator constKeyValueBegin() const noexcept { return const_key_value_iterator(begin()); }
inline const_key_value_iterator keyValueEnd() const noexcept { return const_key_value_iterator(end()); }
inline const_key_value_iterator constKeyValueEnd() const noexcept { return const_key_value_iterator(end()); }
Associative containers: add a way to obtain a key/value range Our associative containers' iterator's value_type isn't a destructurable type (yielding key/value). This means that something like for (auto [k, v] : map) doesn't even compile -- one can only "directly" iterate on the values. For quite some time we've had QKeyValueIterator to allow key/value iteration, but then one had to resort to a "traditional" for loop: for (auto i = map.keyValueBegin(), e = keyValueEnd(); i!=e; ++i) This can be easily packaged in an adaptor class, which is what this commmit does, thereby offering a C++17-compatible way to obtain key/value iteration over associative containers. Something possibly peculiar is the fact that the range so obtained is a range of pairs of references -- not a range of references to pairs. But that's easily explained by the fact that we have no pairs to build references to; hence, for (auto &[k, v] : map.asKeyValueRange()) doesn't compile (lvalue reference doesn't bind to prvalue pair). Instead, both of these compile: for (auto [k, v] : map.asKeyValueRange()) for (auto &&[k, v] : map.asKeyValueRange()) and in *both* cases one gets references to the keys/values in the map. If the map is non-const, the reference to the value is mutable. Last but not least, implement pinning for rvalue containers. [ChangeLog][QtCore][QMap] Added asKeyValueRange(). [ChangeLog][QtCore][QMultiMap] Added asKeyValueRange(). [ChangeLog][QtCore][QHash] Added asKeyValueRange(). [ChangeLog][QtCore][QMultiHash] Added asKeyValueRange(). Task-number: QTBUG-4615 Change-Id: Ic8506bff38b2f753494b21ab76f52e05c06ffc8b Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
2022-03-01 03:24:38 +00:00
auto asKeyValueRange() & { return QtPrivate::QKeyValueRange(*this); }
auto asKeyValueRange() const & { return QtPrivate::QKeyValueRange(*this); }
auto asKeyValueRange() && { return QtPrivate::QKeyValueRange(std::move(*this)); }
auto asKeyValueRange() const && { return QtPrivate::QKeyValueRange(std::move(*this)); }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
iterator detach(const_iterator it)
{
auto i = it.i;
Chain **e = it.e;
if (d->ref.isShared()) {
// need to store iterator position before detaching
qsizetype n = 0;
Chain *entry = i.node()->value;
while (entry != *it.e) {
++n;
entry = entry->next;
}
Q_ASSERT(entry);
detach_helper();
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
i = d->detachedIterator(i);
e = &i.node()->value;
while (n) {
e = &(*e)->next;
--n;
}
Q_ASSERT(e && *e);
}
return iterator(i, e);
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
iterator erase(const_iterator it)
{
Q_ASSERT(d);
iterator iter = detach(it);
iterator i = iter;
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
Chain *e = *i.e;
Chain *next = e->next;
*i.e = next;
delete e;
if (!next) {
if (i.e == &i.i.node()->value) {
// last remaining entry, erase
typename Data::Bucket bucket(i.i);
d->erase(bucket);
if (bucket.toBucketIndex(d) == d->numBuckets - 1 || bucket.isUnused())
i = iterator(++iter.i);
else // 'i' currently has a nullptr chain. So, we must recreate it
i = iterator(bucket.toIterator(d));
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
} else {
i = iterator(++iter.i);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
}
--m_size;
Q_ASSERT(m_size >= 0);
return i;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
// more Qt
typedef iterator Iterator;
typedef const_iterator ConstIterator;
inline qsizetype count() const noexcept { return size(); }
private:
template <typename K> iterator findImpl(const K &key)
{
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (isEmpty())
return end();
auto it = d->findBucket(key);
size_t bucket = it.toBucketIndex(d);
detach();
it = typename Data::Bucket(d, bucket); // reattach in case of detach
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (it.isUnused())
return end();
return iterator(it.toIterator(d));
}
template <typename K> const_iterator constFindImpl(const K &key) const noexcept
{
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (isEmpty())
return end();
auto it = d->findBucket(key);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (it.isUnused())
return constEnd();
return const_iterator(it.toIterator(d));
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
public:
iterator find(const Key &key)
{
return findImpl(key);
}
const_iterator constFind(const Key &key) const noexcept
{
return constFindImpl(key);
}
const_iterator find(const Key &key) const noexcept
{
return constFindImpl(key);
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
iterator insert(const Key &key, const T &value)
{
return emplace(key, value);
}
template <typename ...Args>
iterator emplace(const Key &key, Args &&... args)
{
return emplace(Key(key), std::forward<Args>(args)...);
}
template <typename ...Args>
iterator emplace(Key &&key, Args &&... args)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (isDetached()) {
if (d->shouldGrow()) // Construct the value now so that no dangling references are used
return emplace_helper(std::move(key), T(std::forward<Args>(args)...));
return emplace_helper(std::move(key), std::forward<Args>(args)...);
}
// else: we must detach
const auto copy = *this; // keep 'args' alive across the detach/growth
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
detach();
return emplace_helper(std::move(key), std::forward<Args>(args)...);
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
float load_factor() const noexcept { return d ? d->loadFactor() : 0; }
static float max_load_factor() noexcept { return 0.5; }
size_t bucket_count() const noexcept { return d ? d->numBuckets : 0; }
static size_t max_bucket_count() noexcept { return Data::maxNumBuckets(); }
[[nodiscard]]
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
inline bool empty() const noexcept { return isEmpty(); }
inline iterator replace(const Key &key, const T &value)
{
return emplaceReplace(key, value);
}
template <typename ...Args>
iterator emplaceReplace(const Key &key, Args &&... args)
{
return emplaceReplace(Key(key), std::forward<Args>(args)...);
}
template <typename ...Args>
iterator emplaceReplace(Key &&key, Args &&... args)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (isDetached()) {
if (d->shouldGrow()) // Construct the value now so that no dangling references are used
return emplaceReplace_helper(std::move(key), T(std::forward<Args>(args)...));
return emplaceReplace_helper(std::move(key), std::forward<Args>(args)...);
}
// else: we must detach
const auto copy = *this; // keep 'args' alive across the detach/growth
detach();
return emplaceReplace_helper(std::move(key), std::forward<Args>(args)...);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
}
inline QMultiHash &operator+=(const QMultiHash &other)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{ this->unite(other); return *this; }
inline QMultiHash operator+(const QMultiHash &other) const
{ QMultiHash result = *this; result += other; return result; }
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
bool contains(const Key &key, const T &value) const noexcept
{
return containsImpl(key, value);
}
private:
template <typename K> bool containsImpl(const K &key, const T &value) const noexcept
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (isEmpty())
return false;
auto n = d->findNode(key);
if (n == nullptr)
return false;
return n->value->contains(value);
}
public:
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
qsizetype remove(const Key &key, const T &value)
{
return removeImpl(key, value);
}
private:
template <typename K> qsizetype removeImpl(const K &key, const T &value)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (isEmpty()) // prevents detaching shared null
return 0;
auto it = d->findBucket(key);
size_t bucket = it.toBucketIndex(d);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
detach();
it = typename Data::Bucket(d, bucket); // reattach in case of detach
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (it.isUnused())
return 0;
qsizetype n = 0;
Chain **e = &it.node()->value;
while (*e) {
Chain *entry = *e;
if (entry->value == value) {
*e = entry->next;
delete entry;
++n;
} else {
e = &entry->next;
}
}
if (!it.node()->value)
d->erase(it);
m_size -= n;
Q_ASSERT(m_size >= 0);
return n;
}
public:
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
qsizetype count(const Key &key) const noexcept
{
return countImpl(key);
}
private:
template <typename K> qsizetype countImpl(const K &key) const noexcept
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (!d)
return 0;
auto it = d->findBucket(key);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (it.isUnused())
return 0;
qsizetype n = 0;
Chain *e = it.node()->value;
while (e) {
++n;
e = e->next;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
return n;
}
public:
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
qsizetype count(const Key &key, const T &value) const noexcept
{
return countImpl(key, value);
}
private:
template <typename K> qsizetype countImpl(const K &key, const T &value) const noexcept
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (!d)
return 0;
auto it = d->findBucket(key);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
if (it.isUnused())
return 0;
qsizetype n = 0;
Chain *e = it.node()->value;
while (e) {
if (e->value == value)
++n;
e = e->next;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
return n;
}
template <typename K> iterator findImpl(const K &key, const T &value)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (isEmpty())
return end();
const auto copy = isDetached() ? QMultiHash() : *this; // keep 'key'/'value' alive across the detach
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
detach();
auto it = constFind(key, value);
return iterator(it.i, it.e);
}
template <typename K> const_iterator constFindImpl(const K &key, const T &value) const noexcept
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
const_iterator i(constFind(key));
const_iterator end(constEnd());
while (i != end && i.key() == key) {
if (i.value() == value)
return i;
++i;
}
return end;
}
public:
iterator find(const Key &key, const T &value)
{
return findImpl(key, value);
}
const_iterator constFind(const Key &key, const T &value) const noexcept
{
return constFindImpl(key, value);
}
const_iterator find(const Key &key, const T &value) const noexcept
{
return constFind(key, value);
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
QMultiHash &unite(const QMultiHash &other)
{
if (isEmpty()) {
*this = other;
} else if (other.isEmpty()) {
;
} else {
QMultiHash copy(other);
detach();
for (auto cit = copy.cbegin(); cit != copy.cend(); ++cit)
insert(cit.key(), *cit);
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
return *this;
}
QMultiHash &unite(const QHash<Key, T> &other)
{
for (auto cit = other.cbegin(); cit != other.cend(); ++cit)
insert(cit.key(), *cit);
return *this;
}
QMultiHash &unite(QHash<Key, T> &&other)
{
if (!other.isDetached()) {
unite(other);
return *this;
}
auto it = other.d->begin();
for (const auto end = other.d->end(); it != end; ++it)
emplace(std::move(it.node()->key), std::move(it.node()->takeValue()));
other.clear();
return *this;
}
std::pair<iterator, iterator> equal_range(const Key &key)
{
return equal_range_impl(key);
}
private:
template <typename K> std::pair<iterator, iterator> equal_range_impl(const K &key)
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
const auto copy = isDetached() ? QMultiHash() : *this; // keep 'key' alive across the detach
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
detach();
auto pair = std::as_const(*this).equal_range(key);
return {iterator(pair.first.i), iterator(pair.second.i)};
}
public:
std::pair<const_iterator, const_iterator> equal_range(const Key &key) const noexcept
{
return equal_range_impl(key);
}
private:
template <typename K> std::pair<const_iterator, const_iterator> equal_range_impl(const K &key) const noexcept
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
{
if (!d)
return {end(), end()};
auto bucket = d->findBucket(key);
if (bucket.isUnused())
return {end(), end()};
auto it = bucket.toIterator(d);
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
auto end = it;
++end;
return {const_iterator(it), const_iterator(end)};
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
void detach_helper()
{
if (!d) {
d = new Data;
return;
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
Data *dd = new Data(*d);
if (!d->ref.deref())
delete d;
d = dd;
}
template<typename... Args>
iterator emplace_helper(Key &&key, Args &&...args)
{
auto result = d->findOrInsert(key);
if (!result.initialized)
Node::createInPlace(result.it.node(), std::move(key), std::forward<Args>(args)...);
else
result.it.node()->insertMulti(std::forward<Args>(args)...);
++m_size;
return iterator(result.it);
}
template<typename... Args>
iterator emplaceReplace_helper(Key &&key, Args &&...args)
{
auto result = d->findOrInsert(key);
if (!result.initialized) {
Node::createInPlace(result.it.node(), std::move(key), std::forward<Args>(args)...);
++m_size;
} else {
result.it.node()->emplaceValue(std::forward<Args>(args)...);
}
return iterator(result.it);
}
template <typename K>
using if_heterogeneously_searchable = QHashPrivate::if_heterogeneously_searchable_with<Key, K>;
template <typename K>
using if_key_constructible_from = std::enable_if_t<std::is_constructible_v<Key, K>, bool>;
public:
template <typename K, if_heterogeneously_searchable<K> = true>
qsizetype remove(const K &key)
{
return removeImpl(key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
T take(const K &key)
{
return takeImpl(key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
bool contains(const K &key) const noexcept
{
if (!d)
return false;
return d->findNode(key) != nullptr;
}
template <typename K, if_heterogeneously_searchable<K> = true>
T value(const K &key) const noexcept
{
if (auto *v = valueImpl(key))
return *v;
else
return T();
}
template <typename K, if_heterogeneously_searchable<K> = true>
T value(const K &key, const T &defaultValue) const noexcept
{
if (auto *v = valueImpl(key))
return *v;
else
return defaultValue;
}
template <typename K, if_heterogeneously_searchable<K> = true, if_key_constructible_from<K> = true>
T &operator[](const K &key)
{
return operatorIndexImpl(key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
const T operator[](const K &key) const noexcept
{
return value(key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
QList<T> values(const K &key)
{
return valuesImpl(key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
iterator find(const K &key)
{
return findImpl(key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
const_iterator constFind(const K &key) const noexcept
{
return constFindImpl(key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
const_iterator find(const K &key) const noexcept
{
return constFindImpl(key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
bool contains(const K &key, const T &value) const noexcept
{
return containsImpl(key, value);
}
template <typename K, if_heterogeneously_searchable<K> = true>
qsizetype remove(const K &key, const T &value)
{
return removeImpl(key, value);
}
template <typename K, if_heterogeneously_searchable<K> = true>
qsizetype count(const K &key) const noexcept
{
return countImpl(key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
qsizetype count(const K &key, const T &value) const noexcept
{
return countImpl(key, value);
}
template <typename K, if_heterogeneously_searchable<K> = true>
iterator find(const K &key, const T &value)
{
return findImpl(key, value);
}
template <typename K, if_heterogeneously_searchable<K> = true>
const_iterator constFind(const K &key, const T &value) const noexcept
{
return constFindImpl(key, value);
}
template <typename K, if_heterogeneously_searchable<K> = true>
const_iterator find(const K &key, const T &value) const noexcept
{
return constFind(key, value);
}
template <typename K, if_heterogeneously_searchable<K> = true>
std::pair<iterator, iterator>
equal_range(const K &key)
{
return equal_range_impl(key);
}
template <typename K, if_heterogeneously_searchable<K> = true>
std::pair<const_iterator, const_iterator>
equal_range(const K &key) const noexcept
{
return equal_range_impl(key);
}
New QHash implementation A brand new QHash implementation using a faster and more memory efficient data structure than the old QHash. A new implementation for QHash. Instead of a node based approach as the old QHash, this implementation now uses a two stage lookup table. The total amount of buckets in the table are divided into spans of 128 entries. Inside each span, we use an array of chars to index into a storage area for the span. The storage area for each span is a simple array, that gets (re-)allocated with size increments of 16 items. This gives an average memory overhead of 8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span. To give good performance and avoid too many collisions, the array keeps its load factor between .25 and .5 (and grows and rehashes if the load factor goes above .5). This design allows us to keep the memory overhead of the Hash very small, while at the same time giving very good performance. The calculated overhead for a QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for a QHash<ptr, ptr>. The new implementation also completely splits the QHash and QMultiHash classes. One behavioral change to note is that the new QHash implementation will not provide stable references to nodes in the hash when the table needs to grow. Benchmarking using https://github.com/Tessil/hash-table-shootout shows very nice performance compared to many different hash table implementation. Numbers shown below are for a hash<int64, int64> with 1 million entries. These numbers scale nicely (mostly in a linear fashion with some variation due to varying load factors) to smaller and larger tables. All numbers are in seconds, measured with gcc on Linux: Hash table random random random random reads full insertion insertion full full after iteration (reserved) deletes reads deletes ------------------------------------------------------------------------------ std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708 google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160 google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112 tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042 old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387 new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146 Numbers for hash<std::string, int64>, with the string having 15 characters: Hash table random random random random reads insertion insertion full full after (reserved) deletes reads deletes -------------------------------------------------------------------- std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153 google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622 google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122 tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095 old QHash 0,3236 0,2064 0,5986 0,2115 0,1666 new QHash 0,2119 0,1652 0,2390 0,1378 0,0965 Memory usage numbers (in MB for a table with 1M entries) also look very nice: Hash table Key int64 std::string (15 chars) Value int64 int64 --------------------------------------------------------- std::unordered_map 44.63 75.35 google::dense_hash_map 32.32 80,60 google::sparse_hash_map 18.08 44.21 tsl::sparse_map 20.44 45,93 old QHash 53.95 69,16 new QHash 23.23 51,32 Fixes: QTBUG-80311 Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
};
Q_DECLARE_ASSOCIATIVE_FORWARD_ITERATOR(Hash)
Q_DECLARE_MUTABLE_ASSOCIATIVE_FORWARD_ITERATOR(Hash)
Q_DECLARE_ASSOCIATIVE_FORWARD_ITERATOR(MultiHash)
Q_DECLARE_MUTABLE_ASSOCIATIVE_FORWARD_ITERATOR(MultiHash)
template <class Key, class T>
size_t qHash(const QHash<Key, T> &key, size_t seed = 0)
noexcept(noexcept(qHash(std::declval<Key&>())) && noexcept(qHash(std::declval<T&>())))
{
size_t hash = 0;
for (auto it = key.begin(), end = key.end(); it != end; ++it) {
QtPrivate::QHashCombine combine;
size_t h = combine(seed, it.key());
// use + to keep the result independent of the ordering of the keys
hash += combine(h, it.value());
}
return hash;
}
template <class Key, class T>
inline size_t qHash(const QMultiHash<Key, T> &key, size_t seed = 0)
noexcept(noexcept(qHash(std::declval<Key&>())) && noexcept(qHash(std::declval<T&>())))
{
size_t hash = 0;
for (auto it = key.begin(), end = key.end(); it != end; ++it) {
QtPrivate::QHashCombine combine;
size_t h = combine(seed, it.key());
// use + to keep the result independent of the ordering of the keys
hash += combine(h, it.value());
}
return hash;
}
template <typename Key, typename T, typename Predicate>
qsizetype erase_if(QHash<Key, T> &hash, Predicate pred)
{
return QtPrivate::associative_erase_if(hash, pred);
}
template <typename Key, typename T, typename Predicate>
qsizetype erase_if(QMultiHash<Key, T> &hash, Predicate pred)
{
return QtPrivate::associative_erase_if(hash, pred);
}
QT_END_NAMESPACE
#endif // QHASH_H