2022-05-10 10:06:48 +00:00
|
|
|
// Copyright (C) 2020 The Qt Company Ltd.
|
|
|
|
// Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
|
|
|
|
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
|
2011-04-27 10:05:43 +00:00
|
|
|
|
|
|
|
#ifndef QHASH_H
|
|
|
|
#define QHASH_H
|
|
|
|
|
2023-02-06 23:13:51 +00:00
|
|
|
#include <QtCore/qalgorithms.h>
|
2016-03-03 19:20:42 +00:00
|
|
|
#include <QtCore/qcontainertools_impl.h>
|
2020-06-22 09:06:12 +00:00
|
|
|
#include <QtCore/qhashfunctions.h>
|
|
|
|
#include <QtCore/qiterator.h>
|
|
|
|
#include <QtCore/qlist.h>
|
|
|
|
#include <QtCore/qrefcount.h>
|
2024-05-29 07:33:39 +00:00
|
|
|
#include <QtCore/qttypetraits.h>
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2019-04-04 16:13:32 +00:00
|
|
|
#include <initializer_list>
|
2020-11-27 16:39:10 +00:00
|
|
|
#include <functional> // for std::hash
|
2024-01-31 13:26:27 +00:00
|
|
|
#include <QtCore/q20type_traits.h>
|
2017-04-25 14:58:57 +00:00
|
|
|
|
2021-06-16 09:40:04 +00:00
|
|
|
class tst_QHash; // for befriending
|
|
|
|
|
2011-04-27 10:05:43 +00:00
|
|
|
QT_BEGIN_NAMESPACE
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
struct QHashDummyValue
|
2011-04-27 10:05:43 +00:00
|
|
|
{
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
bool operator==(const QHashDummyValue &) const noexcept { return true; }
|
|
|
|
};
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
namespace QHashPrivate {
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2020-11-27 16:39:10 +00:00
|
|
|
template <typename T, typename = void>
|
|
|
|
constexpr inline bool HasQHashOverload = false;
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
constexpr inline bool HasQHashOverload<T, std::enable_if_t<
|
|
|
|
std::is_convertible_v<decltype(qHash(std::declval<const T &>(), std::declval<size_t>())), size_t>
|
|
|
|
>> = true;
|
|
|
|
|
|
|
|
template <typename T, typename = void>
|
|
|
|
constexpr inline bool HasStdHashSpecializationWithSeed = false;
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
constexpr inline bool HasStdHashSpecializationWithSeed<T, std::enable_if_t<
|
|
|
|
std::is_convertible_v<decltype(std::hash<T>()(std::declval<const T &>(), std::declval<size_t>())), size_t>
|
|
|
|
>> = true;
|
|
|
|
|
|
|
|
template <typename T, typename = void>
|
|
|
|
constexpr inline bool HasStdHashSpecializationWithoutSeed = false;
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
constexpr inline bool HasStdHashSpecializationWithoutSeed<T, std::enable_if_t<
|
|
|
|
std::is_convertible_v<decltype(std::hash<T>()(std::declval<const T &>())), size_t>
|
|
|
|
>> = true;
|
|
|
|
|
|
|
|
template <typename T>
|
2020-12-02 17:07:25 +00:00
|
|
|
size_t calculateHash(const T &t, size_t seed = 0)
|
2020-11-27 16:39:10 +00:00
|
|
|
{
|
|
|
|
if constexpr (HasQHashOverload<T>) {
|
|
|
|
return qHash(t, seed);
|
|
|
|
} else if constexpr (HasStdHashSpecializationWithSeed<T>) {
|
|
|
|
return std::hash<T>()(t, seed);
|
|
|
|
} else if constexpr (HasStdHashSpecializationWithoutSeed<T>) {
|
|
|
|
Q_UNUSED(seed);
|
|
|
|
return std::hash<T>()(t);
|
|
|
|
} else {
|
2024-06-25 20:55:18 +00:00
|
|
|
static_assert(QtPrivate::type_dependent_false<T>(), "The key type must have a qHash overload or a std::hash specialization");
|
2020-11-27 16:39:10 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
template <typename Key, typename T>
|
|
|
|
struct Node
|
2011-04-27 10:05:43 +00:00
|
|
|
{
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
using KeyType = Key;
|
|
|
|
using ValueType = T;
|
|
|
|
|
|
|
|
Key key;
|
|
|
|
T value;
|
2020-02-07 11:29:59 +00:00
|
|
|
template<typename ...Args>
|
|
|
|
static void createInPlace(Node *n, Key &&k, Args &&... args)
|
|
|
|
{ new (n) Node{ std::move(k), T(std::forward<Args>(args)...) }; }
|
|
|
|
template<typename ...Args>
|
|
|
|
static void createInPlace(Node *n, const Key &k, Args &&... args)
|
|
|
|
{ new (n) Node{ Key(k), T(std::forward<Args>(args)...) }; }
|
|
|
|
template<typename ...Args>
|
|
|
|
void emplaceValue(Args &&... args)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2020-02-07 11:29:59 +00:00
|
|
|
value = T(std::forward<Args>(args)...);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
|
|
|
T &&takeValue() noexcept(std::is_nothrow_move_assignable_v<T>)
|
|
|
|
{
|
|
|
|
return std::move(value);
|
|
|
|
}
|
2020-01-22 14:05:38 +00:00
|
|
|
bool valuesEqual(const Node *other) const { return value == other->value; }
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename Key>
|
|
|
|
struct Node<Key, QHashDummyValue> {
|
|
|
|
using KeyType = Key;
|
|
|
|
using ValueType = QHashDummyValue;
|
|
|
|
|
|
|
|
Key key;
|
2020-02-07 11:29:59 +00:00
|
|
|
template<typename ...Args>
|
|
|
|
static void createInPlace(Node *n, Key &&k, Args &&...)
|
|
|
|
{ new (n) Node{ std::move(k) }; }
|
|
|
|
template<typename ...Args>
|
|
|
|
static void createInPlace(Node *n, const Key &k, Args &&...)
|
|
|
|
{ new (n) Node{ k }; }
|
|
|
|
template<typename ...Args>
|
|
|
|
void emplaceValue(Args &&...)
|
2020-01-22 14:05:38 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
ValueType takeValue() { return QHashDummyValue(); }
|
|
|
|
bool valuesEqual(const Node *) const { return true; }
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
};
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
template <typename T>
|
|
|
|
struct MultiNodeChain
|
2011-04-27 10:05:43 +00:00
|
|
|
{
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
T value;
|
|
|
|
MultiNodeChain *next = nullptr;
|
|
|
|
~MultiNodeChain()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
qsizetype free() noexcept(std::is_nothrow_destructible_v<T>)
|
|
|
|
{
|
|
|
|
qsizetype nEntries = 0;
|
|
|
|
MultiNodeChain *e = this;
|
|
|
|
while (e) {
|
|
|
|
MultiNodeChain *n = e->next;
|
|
|
|
++nEntries;
|
|
|
|
delete e;
|
|
|
|
e = n;
|
2011-04-27 10:05:43 +00:00
|
|
|
}
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
return nEntries;
|
2011-04-27 10:05:43 +00:00
|
|
|
}
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
bool contains(const T &val) const noexcept
|
|
|
|
{
|
|
|
|
const MultiNodeChain *e = this;
|
|
|
|
while (e) {
|
|
|
|
if (e->value == val)
|
|
|
|
return true;
|
|
|
|
e = e->next;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
};
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
template <typename Key, typename T>
|
|
|
|
struct MultiNode
|
2011-04-27 10:05:43 +00:00
|
|
|
{
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
using KeyType = Key;
|
|
|
|
using ValueType = T;
|
|
|
|
using Chain = MultiNodeChain<T>;
|
|
|
|
|
|
|
|
Key key;
|
|
|
|
Chain *value;
|
|
|
|
|
2020-02-07 11:29:59 +00:00
|
|
|
template<typename ...Args>
|
|
|
|
static void createInPlace(MultiNode *n, Key &&k, Args &&... args)
|
|
|
|
{ new (n) MultiNode(std::move(k), new Chain{ T(std::forward<Args>(args)...), nullptr }); }
|
|
|
|
template<typename ...Args>
|
|
|
|
static void createInPlace(MultiNode *n, const Key &k, Args &&... args)
|
|
|
|
{ new (n) MultiNode(k, new Chain{ T(std::forward<Args>(args)...), nullptr }); }
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
MultiNode(const Key &k, Chain *c)
|
|
|
|
: key(k),
|
|
|
|
value(c)
|
|
|
|
{}
|
|
|
|
MultiNode(Key &&k, Chain *c) noexcept(std::is_nothrow_move_assignable_v<Key>)
|
|
|
|
: key(std::move(k)),
|
|
|
|
value(c)
|
|
|
|
{}
|
|
|
|
|
|
|
|
MultiNode(MultiNode &&other)
|
|
|
|
: key(other.key),
|
2022-09-26 15:38:14 +00:00
|
|
|
value(std::exchange(other.value, nullptr))
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
MultiNode(const MultiNode &other)
|
|
|
|
: key(other.key)
|
|
|
|
{
|
|
|
|
Chain *c = other.value;
|
|
|
|
Chain **e = &value;
|
|
|
|
while (c) {
|
|
|
|
Chain *chain = new Chain{ c->value, nullptr };
|
|
|
|
*e = chain;
|
|
|
|
e = &chain->next;
|
|
|
|
c = c->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
~MultiNode()
|
|
|
|
{
|
|
|
|
if (value)
|
|
|
|
value->free();
|
|
|
|
}
|
|
|
|
static qsizetype freeChain(MultiNode *n) noexcept(std::is_nothrow_destructible_v<T>)
|
|
|
|
{
|
|
|
|
qsizetype size = n->value->free();
|
|
|
|
n->value = nullptr;
|
|
|
|
return size;
|
|
|
|
}
|
2020-02-07 11:29:59 +00:00
|
|
|
template<typename ...Args>
|
|
|
|
void insertMulti(Args &&... args)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2020-02-07 11:29:59 +00:00
|
|
|
Chain *e = new Chain{ T(std::forward<Args>(args)...), nullptr };
|
2022-09-26 15:38:14 +00:00
|
|
|
e->next = std::exchange(value, e);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2020-02-07 11:29:59 +00:00
|
|
|
template<typename ...Args>
|
|
|
|
void emplaceValue(Args &&... args)
|
|
|
|
{
|
|
|
|
value->value = T(std::forward<Args>(args)...);
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
};
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
template<typename Node>
|
|
|
|
constexpr bool isRelocatable()
|
2011-04-27 10:05:43 +00:00
|
|
|
{
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
return QTypeInfo<typename Node::KeyType>::isRelocatable && QTypeInfo<typename Node::ValueType>::isRelocatable;
|
2011-04-27 10:05:43 +00:00
|
|
|
}
|
|
|
|
|
2021-12-09 11:18:05 +00:00
|
|
|
struct SpanConstants {
|
2021-03-15 07:39:51 +00:00
|
|
|
static constexpr size_t SpanShift = 7;
|
|
|
|
static constexpr size_t NEntries = (1 << SpanShift);
|
2021-12-09 11:18:05 +00:00
|
|
|
static constexpr size_t LocalBucketMask = (NEntries - 1);
|
|
|
|
static constexpr size_t UnusedEntry = 0xff;
|
|
|
|
|
|
|
|
static_assert ((NEntries & LocalBucketMask) == 0, "NEntries must be a power of two.");
|
|
|
|
};
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
// Regular hash tables consist of a list of buckets that can store Nodes. But simply allocating one large array of buckets
|
|
|
|
// would waste a lot of memory. To avoid this, we split the vector of buckets up into a vector of Spans. Each Span represents
|
|
|
|
// NEntries buckets. To quickly find the correct Span that holds a bucket, NEntries must be a power of two.
|
|
|
|
//
|
|
|
|
// Inside each Span, there is an offset array that represents the actual buckets. offsets contains either an index into the
|
|
|
|
// actual storage space for the Nodes (the 'entries' member) or 0xff (UnusedEntry) to flag that the bucket is empty.
|
|
|
|
// As we have only 128 entries per Span, the offset array can be represented using an unsigned char. This trick makes the hash
|
|
|
|
// table have a very small memory overhead compared to many other implementations.
|
|
|
|
template<typename Node>
|
|
|
|
struct Span {
|
|
|
|
// Entry is a slot available for storing a Node. The Span holds a pointer to
|
|
|
|
// an array of Entries. Upon construction of the array, those entries are
|
|
|
|
// unused, and nextFree() is being used to set up a singly linked list
|
|
|
|
// of free entries.
|
|
|
|
// When a node gets inserted, the first free entry is being picked, removed
|
|
|
|
// from the singly linked list and the Node gets constructed in place.
|
|
|
|
struct Entry {
|
2022-05-07 10:28:35 +00:00
|
|
|
struct { alignas(Node) unsigned char data[sizeof(Node)]; } storage;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
|
|
|
|
unsigned char &nextFree() { return *reinterpret_cast<unsigned char *>(&storage); }
|
|
|
|
Node &node() { return *reinterpret_cast<Node *>(&storage); }
|
|
|
|
};
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2021-12-09 11:18:05 +00:00
|
|
|
unsigned char offsets[SpanConstants::NEntries];
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
Entry *entries = nullptr;
|
|
|
|
unsigned char allocated = 0;
|
|
|
|
unsigned char nextFree = 0;
|
|
|
|
Span() noexcept
|
|
|
|
{
|
2021-12-09 11:18:05 +00:00
|
|
|
memset(offsets, SpanConstants::UnusedEntry, sizeof(offsets));
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
|
|
|
~Span()
|
|
|
|
{
|
|
|
|
freeData();
|
|
|
|
}
|
|
|
|
void freeData() noexcept(std::is_nothrow_destructible<Node>::value)
|
|
|
|
{
|
|
|
|
if (entries) {
|
|
|
|
if constexpr (!std::is_trivially_destructible<Node>::value) {
|
|
|
|
for (auto o : offsets) {
|
2021-12-09 11:18:05 +00:00
|
|
|
if (o != SpanConstants::UnusedEntry)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
entries[o].node().~Node();
|
|
|
|
}
|
|
|
|
}
|
2020-10-20 10:19:21 +00:00
|
|
|
delete[] entries;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
entries = nullptr;
|
|
|
|
}
|
|
|
|
}
|
2020-02-07 11:29:59 +00:00
|
|
|
Node *insert(size_t i)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2022-02-15 13:23:31 +00:00
|
|
|
Q_ASSERT(i < SpanConstants::NEntries);
|
2021-12-09 11:18:05 +00:00
|
|
|
Q_ASSERT(offsets[i] == SpanConstants::UnusedEntry);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (nextFree == allocated)
|
|
|
|
addStorage();
|
|
|
|
unsigned char entry = nextFree;
|
|
|
|
Q_ASSERT(entry < allocated);
|
|
|
|
nextFree = entries[entry].nextFree();
|
|
|
|
offsets[i] = entry;
|
2020-02-07 11:29:59 +00:00
|
|
|
return &entries[entry].node();
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
|
|
|
void erase(size_t bucket) noexcept(std::is_nothrow_destructible<Node>::value)
|
|
|
|
{
|
2022-02-15 13:23:31 +00:00
|
|
|
Q_ASSERT(bucket < SpanConstants::NEntries);
|
2021-12-09 11:18:05 +00:00
|
|
|
Q_ASSERT(offsets[bucket] != SpanConstants::UnusedEntry);
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
unsigned char entry = offsets[bucket];
|
2021-12-09 11:18:05 +00:00
|
|
|
offsets[bucket] = SpanConstants::UnusedEntry;
|
2014-03-07 12:18:31 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
entries[entry].node().~Node();
|
|
|
|
entries[entry].nextFree() = nextFree;
|
|
|
|
nextFree = entry;
|
|
|
|
}
|
|
|
|
size_t offset(size_t i) const noexcept
|
|
|
|
{
|
|
|
|
return offsets[i];
|
|
|
|
}
|
|
|
|
bool hasNode(size_t i) const noexcept
|
|
|
|
{
|
2021-12-09 11:18:05 +00:00
|
|
|
return (offsets[i] != SpanConstants::UnusedEntry);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
|
|
|
Node &at(size_t i) noexcept
|
|
|
|
{
|
2022-02-15 13:23:31 +00:00
|
|
|
Q_ASSERT(i < SpanConstants::NEntries);
|
2021-12-09 11:18:05 +00:00
|
|
|
Q_ASSERT(offsets[i] != SpanConstants::UnusedEntry);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
|
|
|
|
return entries[offsets[i]].node();
|
|
|
|
}
|
|
|
|
const Node &at(size_t i) const noexcept
|
|
|
|
{
|
2022-02-15 13:23:31 +00:00
|
|
|
Q_ASSERT(i < SpanConstants::NEntries);
|
2021-12-09 11:18:05 +00:00
|
|
|
Q_ASSERT(offsets[i] != SpanConstants::UnusedEntry);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
|
|
|
|
return entries[offsets[i]].node();
|
|
|
|
}
|
|
|
|
Node &atOffset(size_t o) noexcept
|
|
|
|
{
|
|
|
|
Q_ASSERT(o < allocated);
|
|
|
|
|
|
|
|
return entries[o].node();
|
|
|
|
}
|
|
|
|
const Node &atOffset(size_t o) const noexcept
|
|
|
|
{
|
|
|
|
Q_ASSERT(o < allocated);
|
|
|
|
|
|
|
|
return entries[o].node();
|
|
|
|
}
|
|
|
|
void moveLocal(size_t from, size_t to) noexcept
|
|
|
|
{
|
2021-12-09 11:18:05 +00:00
|
|
|
Q_ASSERT(offsets[from] != SpanConstants::UnusedEntry);
|
|
|
|
Q_ASSERT(offsets[to] == SpanConstants::UnusedEntry);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
offsets[to] = offsets[from];
|
2021-12-09 11:18:05 +00:00
|
|
|
offsets[from] = SpanConstants::UnusedEntry;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
|
|
|
void moveFromSpan(Span &fromSpan, size_t fromIndex, size_t to) noexcept(std::is_nothrow_move_constructible_v<Node>)
|
|
|
|
{
|
2022-02-15 13:23:31 +00:00
|
|
|
Q_ASSERT(to < SpanConstants::NEntries);
|
2021-12-09 11:18:05 +00:00
|
|
|
Q_ASSERT(offsets[to] == SpanConstants::UnusedEntry);
|
2022-02-15 13:23:31 +00:00
|
|
|
Q_ASSERT(fromIndex < SpanConstants::NEntries);
|
2021-12-09 11:18:05 +00:00
|
|
|
Q_ASSERT(fromSpan.offsets[fromIndex] != SpanConstants::UnusedEntry);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (nextFree == allocated)
|
|
|
|
addStorage();
|
|
|
|
Q_ASSERT(nextFree < allocated);
|
|
|
|
offsets[to] = nextFree;
|
|
|
|
Entry &toEntry = entries[nextFree];
|
|
|
|
nextFree = toEntry.nextFree();
|
|
|
|
|
|
|
|
size_t fromOffset = fromSpan.offsets[fromIndex];
|
2021-12-09 11:18:05 +00:00
|
|
|
fromSpan.offsets[fromIndex] = SpanConstants::UnusedEntry;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
Entry &fromEntry = fromSpan.entries[fromOffset];
|
|
|
|
|
|
|
|
if constexpr (isRelocatable<Node>()) {
|
|
|
|
memcpy(&toEntry, &fromEntry, sizeof(Entry));
|
|
|
|
} else {
|
|
|
|
new (&toEntry.node()) Node(std::move(fromEntry.node()));
|
|
|
|
fromEntry.node().~Node();
|
|
|
|
}
|
|
|
|
fromEntry.nextFree() = fromSpan.nextFree;
|
|
|
|
fromSpan.nextFree = static_cast<unsigned char>(fromOffset);
|
|
|
|
}
|
|
|
|
|
|
|
|
void addStorage()
|
|
|
|
{
|
2021-12-09 11:18:05 +00:00
|
|
|
Q_ASSERT(allocated < SpanConstants::NEntries);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
Q_ASSERT(nextFree == allocated);
|
|
|
|
// the hash table should always be between 25 and 50% full
|
|
|
|
// this implies that we on average have between 32 and 64 entries
|
2021-03-15 07:39:51 +00:00
|
|
|
// in here. More exactly, we have a binominal distribution of the amount of
|
|
|
|
// occupied entries.
|
|
|
|
// For a 25% filled table, the average is 32 entries, with a 95% chance that we have between
|
|
|
|
// 23 and 41 entries.
|
|
|
|
// For a 50% filled table, the average is 64 entries, with a 95% chance that we have between
|
|
|
|
// 53 and 75 entries.
|
|
|
|
// Since we only resize the table once it's 50% filled and we want to avoid copies of
|
|
|
|
// data where possible, we initially allocate 48 entries, then resize to 80 entries, after that
|
|
|
|
// resize by increments of 16. That way, we usually only get one resize of the table
|
|
|
|
// while filling it.
|
|
|
|
size_t alloc;
|
|
|
|
static_assert(SpanConstants::NEntries % 8 == 0);
|
|
|
|
if (!allocated)
|
|
|
|
alloc = SpanConstants::NEntries / 8 * 3;
|
|
|
|
else if (allocated == SpanConstants::NEntries / 8 * 3)
|
|
|
|
alloc = SpanConstants::NEntries / 8 * 5;
|
|
|
|
else
|
|
|
|
alloc = allocated + SpanConstants::NEntries/8;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
Entry *newEntries = new Entry[alloc];
|
|
|
|
// we only add storage if the previous storage was fully filled, so
|
|
|
|
// simply copy the old data over
|
|
|
|
if constexpr (isRelocatable<Node>()) {
|
2020-07-14 10:56:27 +00:00
|
|
|
if (allocated)
|
2020-10-20 10:19:21 +00:00
|
|
|
memcpy(newEntries, entries, allocated * sizeof(Entry));
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
} else {
|
|
|
|
for (size_t i = 0; i < allocated; ++i) {
|
|
|
|
new (&newEntries[i].node()) Node(std::move(entries[i].node()));
|
|
|
|
entries[i].node().~Node();
|
|
|
|
}
|
|
|
|
}
|
2021-03-15 07:39:51 +00:00
|
|
|
for (size_t i = allocated; i < alloc; ++i) {
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
newEntries[i].nextFree() = uchar(i + 1);
|
|
|
|
}
|
2020-10-20 10:19:21 +00:00
|
|
|
delete[] entries;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
entries = newEntries;
|
|
|
|
allocated = uchar(alloc);
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
};
|
|
|
|
|
2021-08-31 16:46:48 +00:00
|
|
|
// QHash uses a power of two growth policy.
|
|
|
|
namespace GrowthPolicy {
|
|
|
|
inline constexpr size_t bucketsForCapacity(size_t requestedCapacity) noexcept
|
|
|
|
{
|
2023-02-06 23:13:51 +00:00
|
|
|
constexpr int SizeDigits = std::numeric_limits<size_t>::digits;
|
|
|
|
|
2021-03-15 07:39:51 +00:00
|
|
|
// We want to use at minimum a full span (128 entries), so we hardcode it for any requested
|
|
|
|
// capacity <= 64. Any capacity above that gets rounded to a later power of two.
|
|
|
|
if (requestedCapacity <= 64)
|
|
|
|
return SpanConstants::NEntries;
|
2023-02-06 23:13:51 +00:00
|
|
|
|
|
|
|
// Same as
|
|
|
|
// qNextPowerOfTwo(2 * requestedCapacity);
|
|
|
|
//
|
|
|
|
// but ensuring neither our multiplication nor the function overflow.
|
|
|
|
// Additionally, the maximum memory allocation is 2^31-1 or 2^63-1 bytes
|
|
|
|
// (limited by qsizetype and ptrdiff_t).
|
|
|
|
int count = qCountLeadingZeroBits(requestedCapacity);
|
|
|
|
if (count < 2)
|
|
|
|
return (std::numeric_limits<size_t>::max)(); // will cause std::bad_alloc
|
|
|
|
return size_t(1) << (SizeDigits - count + 1);
|
2021-08-31 16:46:48 +00:00
|
|
|
}
|
|
|
|
inline constexpr size_t bucketForHash(size_t nBuckets, size_t hash) noexcept
|
|
|
|
{
|
|
|
|
return hash & (nBuckets - 1);
|
|
|
|
}
|
|
|
|
} // namespace GrowthPolicy
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
template <typename Node>
|
|
|
|
struct iterator;
|
|
|
|
|
|
|
|
template <typename Node>
|
|
|
|
struct Data
|
2011-04-27 10:05:43 +00:00
|
|
|
{
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
using Key = typename Node::KeyType;
|
|
|
|
using T = typename Node::ValueType;
|
|
|
|
using Span = QHashPrivate::Span<Node>;
|
|
|
|
using iterator = QHashPrivate::iterator<Node>;
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
QtPrivate::RefCount ref = {{1}};
|
|
|
|
size_t size = 0;
|
|
|
|
size_t numBuckets = 0;
|
|
|
|
size_t seed = 0;
|
2021-03-15 07:39:51 +00:00
|
|
|
Span *spans = nullptr;
|
2014-03-07 12:18:31 +00:00
|
|
|
|
2023-02-06 23:13:51 +00:00
|
|
|
static constexpr size_t maxNumBuckets() noexcept
|
|
|
|
{
|
|
|
|
return (std::numeric_limits<ptrdiff_t>::max)() / sizeof(Span);
|
|
|
|
}
|
|
|
|
|
2021-03-15 07:39:51 +00:00
|
|
|
struct Bucket {
|
|
|
|
Span *span;
|
|
|
|
size_t index;
|
|
|
|
|
|
|
|
Bucket(Span *s, size_t i) noexcept
|
|
|
|
: span(s), index(i)
|
|
|
|
{}
|
|
|
|
Bucket(const Data *d, size_t bucket) noexcept
|
|
|
|
: span(d->spans + (bucket >> SpanConstants::SpanShift)),
|
|
|
|
index(bucket & SpanConstants::LocalBucketMask)
|
|
|
|
{}
|
|
|
|
Bucket(iterator it) noexcept
|
|
|
|
: Bucket(it.d, it.bucket)
|
|
|
|
{}
|
|
|
|
|
|
|
|
size_t toBucketIndex(const Data *d) const noexcept
|
|
|
|
{
|
|
|
|
return ((span - d->spans) << SpanConstants::SpanShift) | index;
|
|
|
|
}
|
|
|
|
iterator toIterator(const Data *d) const noexcept { return iterator{d, toBucketIndex(d)}; }
|
|
|
|
void advanceWrapped(const Data *d) noexcept
|
|
|
|
{
|
|
|
|
advance_impl(d, d->spans);
|
|
|
|
}
|
|
|
|
void advance(const Data *d) noexcept
|
|
|
|
{
|
|
|
|
advance_impl(d, nullptr);
|
|
|
|
}
|
|
|
|
bool isUnused() const noexcept
|
|
|
|
{
|
|
|
|
return !span->hasNode(index);
|
|
|
|
}
|
|
|
|
size_t offset() const noexcept
|
|
|
|
{
|
|
|
|
return span->offset(index);
|
|
|
|
}
|
|
|
|
Node &nodeAtOffset(size_t offset)
|
|
|
|
{
|
|
|
|
return span->atOffset(offset);
|
|
|
|
}
|
|
|
|
Node *node()
|
|
|
|
{
|
|
|
|
return &span->at(index);
|
|
|
|
}
|
|
|
|
Node *insert() const
|
|
|
|
{
|
|
|
|
return span->insert(index);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
friend bool operator==(Bucket lhs, Bucket rhs) noexcept
|
|
|
|
{
|
|
|
|
return lhs.span == rhs.span && lhs.index == rhs.index;
|
|
|
|
}
|
|
|
|
friend bool operator!=(Bucket lhs, Bucket rhs) noexcept { return !(lhs == rhs); }
|
|
|
|
|
|
|
|
void advance_impl(const Data *d, Span *whenAtEnd) noexcept
|
|
|
|
{
|
|
|
|
Q_ASSERT(span);
|
|
|
|
++index;
|
|
|
|
if (Q_UNLIKELY(index == SpanConstants::NEntries)) {
|
|
|
|
index = 0;
|
|
|
|
++span;
|
|
|
|
if (span - d->spans == ptrdiff_t(d->numBuckets >> SpanConstants::SpanShift))
|
|
|
|
span = whenAtEnd;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
|
2023-05-03 18:11:36 +00:00
|
|
|
static auto allocateSpans(size_t numBuckets)
|
|
|
|
{
|
|
|
|
struct R {
|
|
|
|
Span *spans;
|
|
|
|
size_t nSpans;
|
|
|
|
};
|
|
|
|
|
QHash: suppress GCC 12 & 13 warning that QHash could overflow
It can't unless you really have so many elements that it should
overflow. When growing, we call bucketsForCapacity(), which won't
overflow; when copying/detaching, we allocate the exact same amount of
memory that we've previously allocated, so that has to be good too.
There was nothing wrong with the previous code. The warning was showing
how the compiler had detected a possible overflow and caused a call to
operator new(-1) to force std::bad_alloc to be thrown. Disabling the
warning did not work in LTO mode. So we mimic it: Q_CHECK_PTR will call
qBadAlloc() for us if exceptions are enabled, or qt_check_pointer() if
not but assertions are (if neither are, then we have no means of
reporting the error, so let's just assume that it can't happen).
In function ‘allocateSpans’,
inlined from ‘__ct ’ at qhash.h:581:48,
inlined from ‘detached’ at qhash.h:596:20,
[...]
qhash.h:551:19: warning: argument 1 value ‘18446744073709551615’ exceeds maximum object size 9223372036854775807 [-Walloc-size-larger-than=]
Commit 1d167b515ef81ba71f3f47863e66d36ed6d06c1c is the likely source of
this warning.
Fixes: QTBUG-113335
Pick-to: 6.5
Change-Id: Ieab617d69f3b4b54ab30fffd175bb8d36228209c
Reviewed-by: Lars Knoll <lars@knoll.priv.no>
2023-05-03 19:28:23 +00:00
|
|
|
constexpr qptrdiff MaxSpanCount = (std::numeric_limits<qptrdiff>::max)() / sizeof(Span);
|
|
|
|
constexpr size_t MaxBucketCount = MaxSpanCount << SpanConstants::SpanShift;
|
|
|
|
|
|
|
|
if (numBuckets > MaxBucketCount) {
|
|
|
|
Q_CHECK_PTR(false);
|
|
|
|
Q_UNREACHABLE(); // no exceptions and no assertions -> no error reporting
|
|
|
|
}
|
|
|
|
|
2023-05-03 18:11:36 +00:00
|
|
|
size_t nSpans = numBuckets >> SpanConstants::SpanShift;
|
|
|
|
return R{ new Span[nSpans], nSpans };
|
|
|
|
}
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
Data(size_t reserve = 0)
|
|
|
|
{
|
|
|
|
numBuckets = GrowthPolicy::bucketsForCapacity(reserve);
|
2023-05-03 18:11:36 +00:00
|
|
|
spans = allocateSpans(numBuckets).spans;
|
2021-04-02 06:51:10 +00:00
|
|
|
seed = QHashSeed::globalSeed();
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
|
|
|
|
2022-04-01 08:37:45 +00:00
|
|
|
void reallocationHelper(const Data &other, size_t nSpans, bool resized)
|
|
|
|
{
|
|
|
|
for (size_t s = 0; s < nSpans; ++s) {
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
const Span &span = other.spans[s];
|
2021-12-09 11:18:05 +00:00
|
|
|
for (size_t index = 0; index < SpanConstants::NEntries; ++index) {
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (!span.hasNode(index))
|
|
|
|
continue;
|
|
|
|
const Node &n = span.at(index);
|
2022-04-01 08:37:45 +00:00
|
|
|
auto it = resized ? findBucket(n.key) : Bucket { spans + s, index };
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
Q_ASSERT(it.isUnused());
|
2021-03-15 07:39:51 +00:00
|
|
|
Node *newNode = it.insert();
|
2020-02-07 11:29:59 +00:00
|
|
|
new (newNode) Node(n);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-01 08:37:45 +00:00
|
|
|
Data(const Data &other) : size(other.size), numBuckets(other.numBuckets), seed(other.seed)
|
|
|
|
{
|
2023-05-03 18:11:36 +00:00
|
|
|
auto r = allocateSpans(numBuckets);
|
|
|
|
spans = r.spans;
|
|
|
|
reallocationHelper(other, r.nSpans, false);
|
2022-04-01 08:37:45 +00:00
|
|
|
}
|
|
|
|
Data(const Data &other, size_t reserved) : size(other.size), seed(other.seed)
|
|
|
|
{
|
|
|
|
numBuckets = GrowthPolicy::bucketsForCapacity(qMax(size, reserved));
|
2023-05-03 18:11:36 +00:00
|
|
|
spans = allocateSpans(numBuckets).spans;
|
2022-04-01 08:37:45 +00:00
|
|
|
size_t otherNSpans = other.numBuckets >> SpanConstants::SpanShift;
|
2024-01-30 16:04:25 +00:00
|
|
|
reallocationHelper(other, otherNSpans, numBuckets != other.numBuckets);
|
2022-04-01 08:37:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static Data *detached(Data *d)
|
|
|
|
{
|
|
|
|
if (!d)
|
|
|
|
return new Data;
|
|
|
|
Data *dd = new Data(*d);
|
|
|
|
if (!d->ref.deref())
|
|
|
|
delete d;
|
|
|
|
return dd;
|
|
|
|
}
|
|
|
|
static Data *detached(Data *d, size_t size)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
|
|
|
if (!d)
|
|
|
|
return new Data(size);
|
|
|
|
Data *dd = new Data(*d, size);
|
|
|
|
if (!d->ref.deref())
|
|
|
|
delete d;
|
|
|
|
return dd;
|
|
|
|
}
|
|
|
|
|
2020-01-23 13:59:35 +00:00
|
|
|
void clear()
|
|
|
|
{
|
2020-10-20 10:19:21 +00:00
|
|
|
delete[] spans;
|
2020-01-23 13:59:35 +00:00
|
|
|
spans = nullptr;
|
|
|
|
size = 0;
|
|
|
|
numBuckets = 0;
|
|
|
|
}
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
iterator detachedIterator(iterator other) const noexcept
|
|
|
|
{
|
|
|
|
return iterator{this, other.bucket};
|
|
|
|
}
|
|
|
|
|
|
|
|
iterator begin() const noexcept
|
|
|
|
{
|
|
|
|
iterator it{ this, 0 };
|
|
|
|
if (it.isUnused())
|
|
|
|
++it;
|
|
|
|
return it;
|
|
|
|
}
|
|
|
|
|
|
|
|
constexpr iterator end() const noexcept
|
|
|
|
{
|
|
|
|
return iterator();
|
|
|
|
}
|
|
|
|
|
|
|
|
void rehash(size_t sizeHint = 0)
|
|
|
|
{
|
|
|
|
if (sizeHint == 0)
|
|
|
|
sizeHint = size;
|
|
|
|
size_t newBucketCount = GrowthPolicy::bucketsForCapacity(sizeHint);
|
|
|
|
|
|
|
|
Span *oldSpans = spans;
|
|
|
|
size_t oldBucketCount = numBuckets;
|
2023-05-03 18:11:36 +00:00
|
|
|
spans = allocateSpans(newBucketCount).spans;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
numBuckets = newBucketCount;
|
2021-03-15 07:39:51 +00:00
|
|
|
size_t oldNSpans = oldBucketCount >> SpanConstants::SpanShift;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
|
|
|
|
for (size_t s = 0; s < oldNSpans; ++s) {
|
|
|
|
Span &span = oldSpans[s];
|
2021-12-09 11:18:05 +00:00
|
|
|
for (size_t index = 0; index < SpanConstants::NEntries; ++index) {
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (!span.hasNode(index))
|
|
|
|
continue;
|
|
|
|
Node &n = span.at(index);
|
2021-03-15 07:39:51 +00:00
|
|
|
auto it = findBucket(n.key);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
Q_ASSERT(it.isUnused());
|
2021-03-15 07:39:51 +00:00
|
|
|
Node *newNode = it.insert();
|
2020-02-07 11:29:59 +00:00
|
|
|
new (newNode) Node(std::move(n));
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
|
|
|
span.freeData();
|
|
|
|
}
|
2020-10-20 10:19:21 +00:00
|
|
|
delete[] oldSpans;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
size_t nextBucket(size_t bucket) const noexcept
|
|
|
|
{
|
|
|
|
++bucket;
|
|
|
|
if (bucket == numBuckets)
|
|
|
|
bucket = 0;
|
|
|
|
return bucket;
|
|
|
|
}
|
|
|
|
|
|
|
|
float loadFactor() const noexcept
|
|
|
|
{
|
|
|
|
return float(size)/numBuckets;
|
|
|
|
}
|
|
|
|
bool shouldGrow() const noexcept
|
|
|
|
{
|
|
|
|
return size >= (numBuckets >> 1);
|
|
|
|
}
|
|
|
|
|
2024-02-03 01:55:03 +00:00
|
|
|
template <typename K> Bucket findBucket(const K &key) const noexcept
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2024-02-03 01:55:03 +00:00
|
|
|
static_assert(std::is_same_v<std::remove_cv_t<Key>, K> ||
|
|
|
|
QHashHeterogeneousSearch<std::remove_cv_t<Key>, K>::value);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
Q_ASSERT(numBuckets > 0);
|
2020-11-27 16:39:10 +00:00
|
|
|
size_t hash = QHashPrivate::calculateHash(key, seed);
|
2021-03-15 07:39:51 +00:00
|
|
|
Bucket bucket(this, GrowthPolicy::bucketForHash(numBuckets, hash));
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
// loop over the buckets until we find the entry we search for
|
|
|
|
// or an empty slot, in which case we know the entry doesn't exist
|
|
|
|
while (true) {
|
2021-03-15 07:39:51 +00:00
|
|
|
size_t offset = bucket.offset();
|
2021-12-09 11:18:05 +00:00
|
|
|
if (offset == SpanConstants::UnusedEntry) {
|
2021-03-15 07:39:51 +00:00
|
|
|
return bucket;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
} else {
|
2021-03-15 07:39:51 +00:00
|
|
|
Node &n = bucket.nodeAtOffset(offset);
|
2020-12-03 08:14:27 +00:00
|
|
|
if (qHashEquals(n.key, key))
|
2021-03-15 07:39:51 +00:00
|
|
|
return bucket;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2021-03-15 07:39:51 +00:00
|
|
|
bucket.advanceWrapped(this);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-02-03 01:55:03 +00:00
|
|
|
template <typename K> Node *findNode(const K &key) const noexcept
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2023-05-29 14:27:36 +00:00
|
|
|
auto bucket = findBucket(key);
|
|
|
|
if (bucket.isUnused())
|
|
|
|
return nullptr;
|
|
|
|
return bucket.node();
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
|
|
|
|
2020-10-20 10:19:21 +00:00
|
|
|
struct InsertionResult
|
|
|
|
{
|
2020-02-07 11:29:59 +00:00
|
|
|
iterator it;
|
|
|
|
bool initialized;
|
|
|
|
};
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
|
2024-02-09 22:52:35 +00:00
|
|
|
template <typename K> InsertionResult findOrInsert(const K &key) noexcept
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2021-03-15 07:39:51 +00:00
|
|
|
Bucket it(static_cast<Span *>(nullptr), 0);
|
2021-10-26 07:48:34 +00:00
|
|
|
if (numBuckets > 0) {
|
2021-03-15 07:39:51 +00:00
|
|
|
it = findBucket(key);
|
2021-10-26 07:48:34 +00:00
|
|
|
if (!it.isUnused())
|
2021-03-15 07:39:51 +00:00
|
|
|
return { it.toIterator(this), true };
|
2021-10-26 07:48:34 +00:00
|
|
|
}
|
|
|
|
if (shouldGrow()) {
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
rehash(size + 1);
|
2021-03-15 07:39:51 +00:00
|
|
|
it = findBucket(key); // need to get a new iterator after rehashing
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2021-03-15 07:39:51 +00:00
|
|
|
Q_ASSERT(it.span != nullptr);
|
2021-10-26 07:48:34 +00:00
|
|
|
Q_ASSERT(it.isUnused());
|
2021-03-15 07:39:51 +00:00
|
|
|
it.insert();
|
2021-10-26 07:48:34 +00:00
|
|
|
++size;
|
2021-03-15 07:39:51 +00:00
|
|
|
return { it.toIterator(this), false };
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
|
|
|
|
2021-03-15 07:39:51 +00:00
|
|
|
void erase(Bucket bucket) noexcept(std::is_nothrow_destructible<Node>::value)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2021-03-15 07:39:51 +00:00
|
|
|
Q_ASSERT(bucket.span->hasNode(bucket.index));
|
|
|
|
bucket.span->erase(bucket.index);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
--size;
|
|
|
|
|
|
|
|
// re-insert the following entries to avoid holes
|
2021-03-15 07:39:51 +00:00
|
|
|
Bucket next = bucket;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
while (true) {
|
2021-03-15 07:39:51 +00:00
|
|
|
next.advanceWrapped(this);
|
|
|
|
size_t offset = next.offset();
|
|
|
|
if (offset == SpanConstants::UnusedEntry)
|
2021-03-15 07:39:51 +00:00
|
|
|
return;
|
2021-03-15 07:39:51 +00:00
|
|
|
size_t hash = QHashPrivate::calculateHash(next.nodeAtOffset(offset).key, seed);
|
|
|
|
Bucket newBucket(this, GrowthPolicy::bucketForHash(numBuckets, hash));
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
while (true) {
|
|
|
|
if (newBucket == next) {
|
|
|
|
// nothing to do, item is at the right plae
|
|
|
|
break;
|
2021-03-15 07:39:51 +00:00
|
|
|
} else if (newBucket == bucket) {
|
|
|
|
// move into the hole we created earlier
|
|
|
|
if (next.span == bucket.span) {
|
|
|
|
bucket.span->moveLocal(next.index, bucket.index);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
} else {
|
|
|
|
// move between spans, more expensive
|
2021-03-15 07:39:51 +00:00
|
|
|
bucket.span->moveFromSpan(*next.span, next.index, bucket.index);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2021-03-15 07:39:51 +00:00
|
|
|
bucket = next;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
break;
|
|
|
|
}
|
2021-03-15 07:39:51 +00:00
|
|
|
newBucket.advanceWrapped(this);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
~Data()
|
|
|
|
{
|
|
|
|
delete [] spans;
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
};
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
template <typename Node>
|
|
|
|
struct iterator {
|
|
|
|
using Span = QHashPrivate::Span<Node>;
|
2012-04-30 09:01:05 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
const Data<Node> *d = nullptr;
|
|
|
|
size_t bucket = 0;
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2021-03-15 07:39:51 +00:00
|
|
|
size_t span() const noexcept { return bucket >> SpanConstants::SpanShift; }
|
2021-12-09 11:18:05 +00:00
|
|
|
size_t index() const noexcept { return bucket & SpanConstants::LocalBucketMask; }
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline bool isUnused() const noexcept { return !d->spans[span()].hasNode(index()); }
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline Node *node() const noexcept
|
|
|
|
{
|
|
|
|
Q_ASSERT(!isUnused());
|
|
|
|
return &d->spans[span()].at(index());
|
|
|
|
}
|
|
|
|
bool atEnd() const noexcept { return !d; }
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
iterator operator++() noexcept
|
|
|
|
{
|
|
|
|
while (true) {
|
|
|
|
++bucket;
|
|
|
|
if (bucket == d->numBuckets) {
|
|
|
|
d = nullptr;
|
|
|
|
bucket = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!isUnused())
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return *this;
|
2011-04-27 10:05:43 +00:00
|
|
|
}
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
bool operator==(iterator other) const noexcept
|
|
|
|
{ return d == other.d && bucket == other.bucket; }
|
|
|
|
bool operator!=(iterator other) const noexcept
|
|
|
|
{ return !(*this == other); }
|
|
|
|
};
|
|
|
|
|
2024-01-31 13:26:27 +00:00
|
|
|
template <typename HashKey, typename KeyArgument>
|
|
|
|
using HeterogenousConstructProxy = std::conditional_t<
|
|
|
|
std::is_same_v<HashKey, q20::remove_cvref_t<KeyArgument>>,
|
|
|
|
KeyArgument, // HashKey == KeyArg w/ potential modifiers, so we keep modifiers
|
|
|
|
HashKey
|
|
|
|
>;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
|
|
|
|
} // namespace QHashPrivate
|
|
|
|
|
2020-10-25 13:20:59 +00:00
|
|
|
template <typename Key, typename T>
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
class QHash
|
|
|
|
{
|
|
|
|
using Node = QHashPrivate::Node<Key, T>;
|
|
|
|
using Data = QHashPrivate::Data<Node>;
|
|
|
|
friend class QSet<Key>;
|
2020-10-27 07:22:42 +00:00
|
|
|
friend class QMultiHash<Key, T>;
|
2021-06-16 09:40:04 +00:00
|
|
|
friend tst_QHash;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
|
|
|
|
Data *d = nullptr;
|
2011-04-27 10:05:43 +00:00
|
|
|
|
|
|
|
public:
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
using key_type = Key;
|
|
|
|
using mapped_type = T;
|
|
|
|
using value_type = T;
|
|
|
|
using size_type = qsizetype;
|
|
|
|
using difference_type = qsizetype;
|
|
|
|
using reference = T &;
|
|
|
|
using const_reference = const T &;
|
|
|
|
|
|
|
|
inline QHash() noexcept = default;
|
2013-01-10 12:45:18 +00:00
|
|
|
inline QHash(std::initializer_list<std::pair<Key,T> > list)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
: d(new Data(list.size()))
|
2013-01-10 12:45:18 +00:00
|
|
|
{
|
|
|
|
for (typename std::initializer_list<std::pair<Key,T> >::const_iterator it = list.begin(); it != list.end(); ++it)
|
|
|
|
insert(it->first, it->second);
|
|
|
|
}
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
QHash(const QHash &other) noexcept
|
|
|
|
: d(other.d)
|
|
|
|
{
|
|
|
|
if (d)
|
|
|
|
d->ref.ref();
|
|
|
|
}
|
|
|
|
~QHash()
|
|
|
|
{
|
2020-11-09 16:27:04 +00:00
|
|
|
static_assert(std::is_nothrow_destructible_v<Key>, "Types with throwing destructors are not supported in Qt containers.");
|
|
|
|
static_assert(std::is_nothrow_destructible_v<T>, "Types with throwing destructors are not supported in Qt containers.");
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (d && !d->ref.deref())
|
|
|
|
delete d;
|
|
|
|
}
|
|
|
|
|
|
|
|
QHash &operator=(const QHash &other) noexcept(std::is_nothrow_destructible<Node>::value)
|
|
|
|
{
|
|
|
|
if (d != other.d) {
|
|
|
|
Data *o = other.d;
|
|
|
|
if (o)
|
|
|
|
o->ref.ref();
|
|
|
|
if (d && !d->ref.deref())
|
|
|
|
delete d;
|
|
|
|
d = o;
|
|
|
|
}
|
|
|
|
return *this;
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
QHash(QHash &&other) noexcept
|
|
|
|
: d(std::exchange(other.d, nullptr))
|
|
|
|
{
|
|
|
|
}
|
Centralize the implementation of move assignment operators
At the moment we have two main strategies for dealing with move
assignment in Qt:
1) move-and-swap, used by "containers" (in the broad sense): containers,
but also smart pointers and similar classes that can hold user-defined
types;
2) pure swap, used by containers that hold only memory (e.g. QString,
QByteArray, ...) as well as most implicitly shared datatypes.
Given the fact that a move assignment operator's code is just
boilerplate (whether it's move-and-swap or pure swap), provide two
_strictly internal_ macros to help write them, and apply the macros
across corelib and gui, porting away from the hand-rolled
implementations.
The rule of thumb when porting to the new macros is:
* Try to stick to the existing code behavior, unless broken
* if changing, then follow this checklist:
* if the class does not have a move constructor => pure swap
(but consider ADDING a move constructor, if possible!)
* if the class does have a move constructor, try to follow the
criteria above, namely:
* if the class holds only memory, pure swap;
* if the class may hold anything else but memory (file handles,
etc.), then move and swap.
Noteworthy details:
* some operators planned to be removed in Qt 6 were not ported;
* as drive-by, some move constructors were simplified to be using
qExchange(); others were outright broken and got fixed;
* some contained some more interesting code and were not touched.
Change-Id: Idaab3489247dcbabb6df3fa1e5286b69e1d372e9
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
2020-01-03 16:18:02 +00:00
|
|
|
QT_MOVE_ASSIGNMENT_OPERATOR_IMPL_VIA_MOVE_AND_SWAP(QHash)
|
2016-03-03 19:20:42 +00:00
|
|
|
#ifdef Q_QDOC
|
|
|
|
template <typename InputIterator>
|
|
|
|
QHash(InputIterator f, InputIterator l);
|
|
|
|
#else
|
|
|
|
template <typename InputIterator, QtPrivate::IfAssociativeIteratorHasKeyAndValue<InputIterator> = true>
|
|
|
|
QHash(InputIterator f, InputIterator l)
|
|
|
|
: QHash()
|
|
|
|
{
|
|
|
|
QtPrivate::reserveIfForwardIterator(this, f, l);
|
|
|
|
for (; f != l; ++f)
|
|
|
|
insert(f.key(), f.value());
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename InputIterator, QtPrivate::IfAssociativeIteratorHasFirstAndSecond<InputIterator> = true>
|
|
|
|
QHash(InputIterator f, InputIterator l)
|
|
|
|
: QHash()
|
|
|
|
{
|
|
|
|
QtPrivate::reserveIfForwardIterator(this, f, l);
|
2024-08-16 16:03:50 +00:00
|
|
|
for (; f != l; ++f) {
|
|
|
|
auto &&e = *f;
|
|
|
|
using V = decltype(e);
|
|
|
|
insert(std::forward<V>(e).first, std::forward<V>(e).second);
|
|
|
|
}
|
2016-03-03 19:20:42 +00:00
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
#endif
|
QtCore: replace qSwap with std::swap/member-swap where possible
qSwap() is a monster that looks for ADL overloads of swap() and also
detects the noexcept of the wrapped swap() function, so it should only
be used when the argument type is unknown. In the vast majority of
cases, the type is known to be efficiently std::swap()able or to have
a member-swap. Call either of these.
For the common case of pointer types, circumvent the expensive trait
checks on std::swap() by providing a hand-rolled qt_ptr_swap()
template, the advantage being that it can be unconditionally noexcept,
removing all type traits instantiations. Don't document it, otherwise
we'd be unable to pick it to 6.2.
Effects on Clang -ftime-trace of a PCH'ed libQt6Gui.so build:
before:
**** Template sets that took longest to instantiate:
[...]
27766 ms: qSwap<$> (9073 times, avg 3 ms)
[...]
2806 ms: std::swap<$> (1229 times, avg 2 ms)
(30572ms)
after:
**** Template sets that took longest to instantiate:
[...]
5047 ms: qSwap<$> (641 times, avg 7 ms)
[...]
3371 ms: std::swap<$> (1376 times, avg 2 ms)
[qt_ptr_swap<$> does not appear in the top 400, so < 905ms]
(< 9323ms)
As a drive-by, remove superfluous inline keywords and template
ornaments.
Task-number: QTBUG-97601
Pick-to: 6.3 6.2
Change-Id: I88f9b4e3cbece268c4a1238b6d50e5712a1bab5a
Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
Reviewed-by: Joerg Bornemann <joerg.bornemann@qt.io>
Reviewed-by: Fabian Kosmale <fabian.kosmale@qt.io>
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
2022-01-19 00:47:35 +00:00
|
|
|
void swap(QHash &other) noexcept { qt_ptr_swap(d, other.d); }
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2024-07-31 14:41:02 +00:00
|
|
|
class const_iterator;
|
|
|
|
|
2022-10-11 08:26:28 +00:00
|
|
|
#ifndef Q_QDOC
|
2024-07-31 14:41:02 +00:00
|
|
|
private:
|
|
|
|
static bool compareIterators(const const_iterator &lhs, const const_iterator &rhs)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2024-07-31 14:41:02 +00:00
|
|
|
return lhs.i.node()->valuesEqual(rhs.i.node());
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename AKey = Key, typename AT = T,
|
|
|
|
QTypeTraits::compare_eq_result_container<QHash, AKey, AT> = true>
|
|
|
|
friend bool comparesEqual(const QHash &lhs, const QHash &rhs) noexcept
|
|
|
|
{
|
|
|
|
if (lhs.d == rhs.d)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
return true;
|
2024-07-31 14:41:02 +00:00
|
|
|
if (lhs.size() != rhs.size())
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
return false;
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2024-07-31 14:41:02 +00:00
|
|
|
for (const_iterator it = rhs.begin(); it != rhs.end(); ++it) {
|
|
|
|
const_iterator i = lhs.find(it.key());
|
|
|
|
if (i == lhs.end() || !compareIterators(i, it))
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
// all values must be the same as size is the same
|
|
|
|
return true;
|
|
|
|
}
|
2024-07-31 14:41:02 +00:00
|
|
|
QT_DECLARE_EQUALITY_OPERATORS_HELPER(QHash, QHash, /* non-constexpr */, noexcept,
|
|
|
|
template <typename AKey = Key, typename AT = T,
|
|
|
|
QTypeTraits::compare_eq_result_container<QHash, AKey, AT> = true>)
|
|
|
|
public:
|
2021-10-08 16:06:49 +00:00
|
|
|
#else
|
2024-07-31 14:41:02 +00:00
|
|
|
friend bool operator==(const QHash &lhs, const QHash &rhs) noexcept;
|
|
|
|
friend bool operator!=(const QHash &lhs, const QHash &rhs) noexcept;
|
2022-10-11 08:26:28 +00:00
|
|
|
#endif // Q_QDOC
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
|
|
|
|
inline qsizetype size() const noexcept { return d ? qsizetype(d->size) : 0; }
|
2024-12-03 13:06:17 +00:00
|
|
|
|
|
|
|
[[nodiscard]]
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline bool isEmpty() const noexcept { return !d || d->size == 0; }
|
|
|
|
|
|
|
|
inline qsizetype capacity() const noexcept { return d ? qsizetype(d->numBuckets >> 1) : 0; }
|
|
|
|
void reserve(qsizetype size)
|
|
|
|
{
|
2022-03-30 15:09:32 +00:00
|
|
|
// reserve(0) is used in squeeze()
|
|
|
|
if (size && (this->capacity() >= size))
|
|
|
|
return;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (isDetached())
|
|
|
|
d->rehash(size);
|
|
|
|
else
|
|
|
|
d = Data::detached(d, size_t(size));
|
|
|
|
}
|
2021-07-23 12:08:33 +00:00
|
|
|
inline void squeeze()
|
|
|
|
{
|
|
|
|
if (capacity())
|
|
|
|
reserve(0);
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline void detach() { if (!d || d->ref.isShared()) d = Data::detached(d); }
|
|
|
|
inline bool isDetached() const noexcept { return d && !d->ref.isShared(); }
|
|
|
|
bool isSharedWith(const QHash &other) const noexcept { return d == other.d; }
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
void clear() noexcept(std::is_nothrow_destructible<Node>::value)
|
|
|
|
{
|
|
|
|
if (d && !d->ref.deref())
|
|
|
|
delete d;
|
|
|
|
d = nullptr;
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
bool remove(const Key &key)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return removeImpl(key);
|
|
|
|
}
|
|
|
|
private:
|
|
|
|
template <typename K> bool removeImpl(const K &key)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
|
|
|
if (isEmpty()) // prevents detaching shared null
|
|
|
|
return false;
|
2021-03-15 07:39:51 +00:00
|
|
|
auto it = d->findBucket(key);
|
|
|
|
size_t bucket = it.toBucketIndex(d);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
detach();
|
2021-03-15 07:39:51 +00:00
|
|
|
it = typename Data::Bucket(d, bucket); // reattach in case of detach
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
|
|
|
|
if (it.isUnused())
|
|
|
|
return false;
|
|
|
|
d->erase(it);
|
|
|
|
return true;
|
|
|
|
}
|
2024-02-03 01:55:03 +00:00
|
|
|
|
|
|
|
public:
|
2020-10-16 18:52:48 +00:00
|
|
|
template <typename Predicate>
|
|
|
|
qsizetype removeIf(Predicate pred)
|
|
|
|
{
|
|
|
|
return QtPrivate::associative_erase_if(*this, pred);
|
|
|
|
}
|
2024-02-03 01:55:03 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
T take(const Key &key)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return takeImpl(key);
|
|
|
|
}
|
|
|
|
private:
|
|
|
|
template <typename K> T takeImpl(const K &key)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
|
|
|
if (isEmpty()) // prevents detaching shared null
|
|
|
|
return T();
|
2021-03-15 07:39:51 +00:00
|
|
|
auto it = d->findBucket(key);
|
|
|
|
size_t bucket = it.toBucketIndex(d);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
detach();
|
2021-03-15 07:39:51 +00:00
|
|
|
it = typename Data::Bucket(d, bucket); // reattach in case of detach
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
|
|
|
|
if (it.isUnused())
|
|
|
|
return T();
|
|
|
|
T value = it.node()->takeValue();
|
|
|
|
d->erase(it);
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
2024-02-03 01:55:03 +00:00
|
|
|
public:
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
bool contains(const Key &key) const noexcept
|
|
|
|
{
|
|
|
|
if (!d)
|
|
|
|
return false;
|
|
|
|
return d->findNode(key) != nullptr;
|
|
|
|
}
|
|
|
|
qsizetype count(const Key &key) const noexcept
|
|
|
|
{
|
|
|
|
return contains(key) ? 1 : 0;
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2021-11-09 10:04:46 +00:00
|
|
|
private:
|
2024-08-26 11:37:56 +00:00
|
|
|
const Key *keyImpl(const T &value) const noexcept
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
|
|
|
if (d) {
|
|
|
|
const_iterator i = begin();
|
|
|
|
while (i != end()) {
|
|
|
|
if (i.value() == value)
|
2024-08-26 11:37:56 +00:00
|
|
|
return &i.key();
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
++i;
|
|
|
|
}
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2024-08-26 11:37:56 +00:00
|
|
|
return nullptr;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2021-11-09 10:04:46 +00:00
|
|
|
|
|
|
|
public:
|
|
|
|
Key key(const T &value) const noexcept
|
|
|
|
{
|
2024-08-26 11:37:56 +00:00
|
|
|
if (auto *k = keyImpl(value))
|
|
|
|
return *k;
|
|
|
|
else
|
|
|
|
return Key();
|
2021-11-09 10:04:46 +00:00
|
|
|
}
|
|
|
|
Key key(const T &value, const Key &defaultKey) const noexcept
|
|
|
|
{
|
2024-08-26 11:37:56 +00:00
|
|
|
if (auto *k = keyImpl(value))
|
|
|
|
return *k;
|
|
|
|
else
|
|
|
|
return defaultKey;
|
2021-11-09 10:04:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2024-08-26 11:37:56 +00:00
|
|
|
template <typename K>
|
|
|
|
T *valueImpl(const K &key) const noexcept
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
|
|
|
if (d) {
|
|
|
|
Node *n = d->findNode(key);
|
|
|
|
if (n)
|
2024-08-26 11:37:56 +00:00
|
|
|
return &n->value;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2024-08-26 11:37:56 +00:00
|
|
|
return nullptr;
|
2021-11-09 10:04:46 +00:00
|
|
|
}
|
|
|
|
public:
|
|
|
|
T value(const Key &key) const noexcept
|
|
|
|
{
|
2024-08-26 11:37:56 +00:00
|
|
|
if (T *v = valueImpl(key))
|
|
|
|
return *v;
|
|
|
|
else
|
|
|
|
return T();
|
2021-11-09 10:04:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
T value(const Key &key, const T &defaultValue) const noexcept
|
|
|
|
{
|
2024-08-26 11:37:56 +00:00
|
|
|
if (T *v = valueImpl(key))
|
|
|
|
return *v;
|
|
|
|
else
|
|
|
|
return defaultValue;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2021-11-09 10:04:46 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
T &operator[](const Key &key)
|
2024-02-09 22:52:35 +00:00
|
|
|
{
|
2024-01-31 13:26:27 +00:00
|
|
|
return *tryEmplace(key).iterator;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
const T operator[](const Key &key) const noexcept
|
|
|
|
{
|
|
|
|
return value(key);
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2020-06-22 09:06:12 +00:00
|
|
|
QList<Key> keys() const { return QList<Key>(keyBegin(), keyEnd()); }
|
|
|
|
QList<Key> keys(const T &value) const
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2020-06-22 09:06:12 +00:00
|
|
|
QList<Key> res;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
const_iterator i = begin();
|
|
|
|
while (i != end()) {
|
|
|
|
if (i.value() == value)
|
|
|
|
res.append(i.key());
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
2020-06-22 09:06:12 +00:00
|
|
|
QList<T> values() const { return QList<T>(begin(), end()); }
|
2011-04-27 10:05:43 +00:00
|
|
|
|
|
|
|
class iterator
|
|
|
|
{
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
using piter = typename QHashPrivate::iterator<Node>;
|
2011-04-27 10:05:43 +00:00
|
|
|
friend class const_iterator;
|
2011-09-09 10:45:18 +00:00
|
|
|
friend class QHash<Key, T>;
|
2015-12-03 11:56:56 +00:00
|
|
|
friend class QSet<Key>;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
piter i;
|
|
|
|
explicit inline iterator(piter it) noexcept : i(it) { }
|
2011-04-27 10:05:43 +00:00
|
|
|
|
|
|
|
public:
|
2018-11-22 11:00:53 +00:00
|
|
|
typedef std::forward_iterator_tag iterator_category;
|
2011-04-27 10:05:43 +00:00
|
|
|
typedef qptrdiff difference_type;
|
|
|
|
typedef T value_type;
|
|
|
|
typedef T *pointer;
|
|
|
|
typedef T &reference;
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
constexpr iterator() noexcept = default;
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline const Key &key() const noexcept { return i.node()->key; }
|
|
|
|
inline T &value() const noexcept { return i.node()->value; }
|
|
|
|
inline T &operator*() const noexcept { return i.node()->value; }
|
|
|
|
inline T *operator->() const noexcept { return &i.node()->value; }
|
|
|
|
inline bool operator==(const iterator &o) const noexcept { return i == o.i; }
|
|
|
|
inline bool operator!=(const iterator &o) const noexcept { return i != o.i; }
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline iterator &operator++() noexcept
|
2018-11-22 11:00:53 +00:00
|
|
|
{
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
++i;
|
2011-04-27 10:05:43 +00:00
|
|
|
return *this;
|
|
|
|
}
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline iterator operator++(int) noexcept
|
2018-11-22 11:00:53 +00:00
|
|
|
{
|
2011-04-27 10:05:43 +00:00
|
|
|
iterator r = *this;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
++i;
|
2011-04-27 10:05:43 +00:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline bool operator==(const const_iterator &o) const noexcept { return i == o.i; }
|
|
|
|
inline bool operator!=(const const_iterator &o) const noexcept { return i != o.i; }
|
2011-04-27 10:05:43 +00:00
|
|
|
};
|
|
|
|
friend class iterator;
|
|
|
|
|
|
|
|
class const_iterator
|
|
|
|
{
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
using piter = typename QHashPrivate::iterator<Node>;
|
2011-04-27 10:05:43 +00:00
|
|
|
friend class iterator;
|
2015-12-03 11:56:56 +00:00
|
|
|
friend class QHash<Key, T>;
|
2015-05-25 17:32:35 +00:00
|
|
|
friend class QSet<Key>;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
piter i;
|
|
|
|
explicit inline const_iterator(piter it) : i(it) { }
|
2011-04-27 10:05:43 +00:00
|
|
|
|
|
|
|
public:
|
2018-11-22 11:00:53 +00:00
|
|
|
typedef std::forward_iterator_tag iterator_category;
|
2011-04-27 10:05:43 +00:00
|
|
|
typedef qptrdiff difference_type;
|
|
|
|
typedef T value_type;
|
|
|
|
typedef const T *pointer;
|
|
|
|
typedef const T &reference;
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
constexpr const_iterator() noexcept = default;
|
|
|
|
inline const_iterator(const iterator &o) noexcept : i(o.i) { }
|
|
|
|
|
|
|
|
inline const Key &key() const noexcept { return i.node()->key; }
|
|
|
|
inline const T &value() const noexcept { return i.node()->value; }
|
|
|
|
inline const T &operator*() const noexcept { return i.node()->value; }
|
|
|
|
inline const T *operator->() const noexcept { return &i.node()->value; }
|
|
|
|
inline bool operator==(const const_iterator &o) const noexcept { return i == o.i; }
|
|
|
|
inline bool operator!=(const const_iterator &o) const noexcept { return i != o.i; }
|
|
|
|
|
|
|
|
inline const_iterator &operator++() noexcept
|
2018-11-22 11:00:53 +00:00
|
|
|
{
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
++i;
|
2011-04-27 10:05:43 +00:00
|
|
|
return *this;
|
|
|
|
}
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline const_iterator operator++(int) noexcept
|
2018-11-22 11:00:53 +00:00
|
|
|
{
|
2011-04-27 10:05:43 +00:00
|
|
|
const_iterator r = *this;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
++i;
|
2011-04-27 10:05:43 +00:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
friend class const_iterator;
|
|
|
|
|
2015-06-22 16:21:34 +00:00
|
|
|
class key_iterator
|
|
|
|
{
|
|
|
|
const_iterator i;
|
|
|
|
|
|
|
|
public:
|
|
|
|
typedef typename const_iterator::iterator_category iterator_category;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
typedef qptrdiff difference_type;
|
2015-06-22 16:21:34 +00:00
|
|
|
typedef Key value_type;
|
|
|
|
typedef const Key *pointer;
|
|
|
|
typedef const Key &reference;
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
key_iterator() noexcept = default;
|
|
|
|
explicit key_iterator(const_iterator o) noexcept : i(o) { }
|
2015-06-22 16:21:34 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
const Key &operator*() const noexcept { return i.key(); }
|
|
|
|
const Key *operator->() const noexcept { return &i.key(); }
|
|
|
|
bool operator==(key_iterator o) const noexcept { return i == o.i; }
|
|
|
|
bool operator!=(key_iterator o) const noexcept { return i != o.i; }
|
2015-06-22 16:21:34 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline key_iterator &operator++() noexcept { ++i; return *this; }
|
|
|
|
inline key_iterator operator++(int) noexcept { return key_iterator(i++);}
|
|
|
|
const_iterator base() const noexcept { return i; }
|
2015-06-22 16:21:34 +00:00
|
|
|
};
|
|
|
|
|
2016-11-29 10:06:24 +00:00
|
|
|
typedef QKeyValueIterator<const Key&, const T&, const_iterator> const_key_value_iterator;
|
|
|
|
typedef QKeyValueIterator<const Key&, T&, iterator> key_value_iterator;
|
|
|
|
|
2011-04-27 10:05:43 +00:00
|
|
|
// STL style
|
2024-10-17 11:56:36 +00:00
|
|
|
inline iterator begin() { if (!d) return iterator(); detach(); return iterator(d->begin()); }
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline const_iterator begin() const noexcept { return d ? const_iterator(d->begin()): const_iterator(); }
|
|
|
|
inline const_iterator cbegin() const noexcept { return d ? const_iterator(d->begin()): const_iterator(); }
|
|
|
|
inline const_iterator constBegin() const noexcept { return d ? const_iterator(d->begin()): const_iterator(); }
|
|
|
|
inline iterator end() noexcept { return iterator(); }
|
|
|
|
inline const_iterator end() const noexcept { return const_iterator(); }
|
|
|
|
inline const_iterator cend() const noexcept { return const_iterator(); }
|
|
|
|
inline const_iterator constEnd() const noexcept { return const_iterator(); }
|
|
|
|
inline key_iterator keyBegin() const noexcept { return key_iterator(begin()); }
|
|
|
|
inline key_iterator keyEnd() const noexcept { return key_iterator(end()); }
|
2016-11-29 10:06:24 +00:00
|
|
|
inline key_value_iterator keyValueBegin() { return key_value_iterator(begin()); }
|
|
|
|
inline key_value_iterator keyValueEnd() { return key_value_iterator(end()); }
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline const_key_value_iterator keyValueBegin() const noexcept { return const_key_value_iterator(begin()); }
|
|
|
|
inline const_key_value_iterator constKeyValueBegin() const noexcept { return const_key_value_iterator(begin()); }
|
|
|
|
inline const_key_value_iterator keyValueEnd() const noexcept { return const_key_value_iterator(end()); }
|
|
|
|
inline const_key_value_iterator constKeyValueEnd() const noexcept { return const_key_value_iterator(end()); }
|
Associative containers: add a way to obtain a key/value range
Our associative containers' iterator's value_type isn't a destructurable
type (yielding key/value). This means that something like
for (auto [k, v] : map)
doesn't even compile -- one can only "directly" iterate on the
values. For quite some time we've had QKeyValueIterator to allow
key/value iteration, but then one had to resort to a "traditional" for
loop:
for (auto i = map.keyValueBegin(), e = keyValueEnd(); i!=e; ++i)
This can be easily packaged in an adaptor class, which is what this
commmit does, thereby offering a C++17-compatible way to obtain
key/value iteration over associative containers.
Something possibly peculiar is the fact that the range so obtained is
a range of pairs of references -- not a range of references to pairs.
But that's easily explained by the fact that we have no pairs to build
references to; hence,
for (auto &[k, v] : map.asKeyValueRange())
doesn't compile (lvalue reference doesn't bind to prvalue pair).
Instead, both of these compile:
for (auto [k, v] : map.asKeyValueRange())
for (auto &&[k, v] : map.asKeyValueRange())
and in *both* cases one gets references to the keys/values in the map.
If the map is non-const, the reference to the value is mutable.
Last but not least, implement pinning for rvalue containers.
[ChangeLog][QtCore][QMap] Added asKeyValueRange().
[ChangeLog][QtCore][QMultiMap] Added asKeyValueRange().
[ChangeLog][QtCore][QHash] Added asKeyValueRange().
[ChangeLog][QtCore][QMultiHash] Added asKeyValueRange().
Task-number: QTBUG-4615
Change-Id: Ic8506bff38b2f753494b21ab76f52e05c06ffc8b
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
2022-03-01 03:24:38 +00:00
|
|
|
auto asKeyValueRange() & { return QtPrivate::QKeyValueRange(*this); }
|
|
|
|
auto asKeyValueRange() const & { return QtPrivate::QKeyValueRange(*this); }
|
|
|
|
auto asKeyValueRange() && { return QtPrivate::QKeyValueRange(std::move(*this)); }
|
|
|
|
auto asKeyValueRange() const && { return QtPrivate::QKeyValueRange(std::move(*this)); }
|
2015-06-22 16:21:34 +00:00
|
|
|
|
2024-01-31 13:26:27 +00:00
|
|
|
struct TryEmplaceResult
|
|
|
|
{
|
|
|
|
QHash::iterator iterator;
|
|
|
|
bool inserted;
|
2024-11-28 12:08:25 +00:00
|
|
|
|
|
|
|
TryEmplaceResult() = default;
|
|
|
|
// Generated SMFs are fine!
|
|
|
|
TryEmplaceResult(QHash::iterator it, bool b)
|
|
|
|
: iterator(it), inserted(b)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
// Implicit conversion _from_ the return-type of try_emplace:
|
|
|
|
Q_IMPLICIT TryEmplaceResult(const std::pair<key_value_iterator, bool> &p)
|
|
|
|
: iterator(p.first.base()), inserted(p.second)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
// Implicit conversion _to_ the return-type of try_emplace:
|
|
|
|
Q_IMPLICIT operator std::pair<key_value_iterator, bool>()
|
|
|
|
{
|
|
|
|
return { key_value_iterator(iterator), inserted };
|
|
|
|
}
|
2024-01-31 13:26:27 +00:00
|
|
|
};
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
iterator erase(const_iterator it)
|
|
|
|
{
|
|
|
|
Q_ASSERT(it != constEnd());
|
|
|
|
detach();
|
|
|
|
// ensure a valid iterator across the detach:
|
|
|
|
iterator i = iterator{d->detachedIterator(it.i)};
|
2021-03-15 07:39:51 +00:00
|
|
|
typename Data::Bucket bucket(i.i);
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2021-03-15 07:39:51 +00:00
|
|
|
d->erase(bucket);
|
|
|
|
if (bucket.toBucketIndex(d) == d->numBuckets - 1 || bucket.isUnused())
|
|
|
|
++i;
|
|
|
|
return i;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2023-12-12 08:20:30 +00:00
|
|
|
std::pair<iterator, iterator> equal_range(const Key &key)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2024-03-06 00:52:36 +00:00
|
|
|
return equal_range_impl(*this, key);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2023-12-12 08:20:30 +00:00
|
|
|
std::pair<const_iterator, const_iterator> equal_range(const Key &key) const noexcept
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2024-03-06 00:52:36 +00:00
|
|
|
return equal_range_impl(*this, key);
|
|
|
|
}
|
|
|
|
private:
|
|
|
|
template <typename Hash, typename K> static auto equal_range_impl(Hash &self, const K &key)
|
|
|
|
{
|
|
|
|
auto first = self.find(key);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
auto second = first;
|
2024-03-06 00:52:36 +00:00
|
|
|
if (second != decltype(first){})
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
++second;
|
2024-03-06 00:52:36 +00:00
|
|
|
return std::make_pair(first, second);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2024-02-03 01:55:03 +00:00
|
|
|
template <typename K> iterator findImpl(const K &key)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
|
|
|
if (isEmpty()) // prevents detaching shared null
|
|
|
|
return end();
|
2021-03-15 07:39:51 +00:00
|
|
|
auto it = d->findBucket(key);
|
|
|
|
size_t bucket = it.toBucketIndex(d);
|
2021-10-26 13:04:19 +00:00
|
|
|
detach();
|
2021-03-15 07:39:51 +00:00
|
|
|
it = typename Data::Bucket(d, bucket); // reattach in case of detach
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (it.isUnused())
|
2021-03-15 07:39:51 +00:00
|
|
|
return end();
|
|
|
|
return iterator(it.toIterator(d));
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2024-02-03 01:55:03 +00:00
|
|
|
template <typename K> const_iterator constFindImpl(const K &key) const noexcept
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
|
|
|
if (isEmpty())
|
|
|
|
return end();
|
2021-03-15 07:39:51 +00:00
|
|
|
auto it = d->findBucket(key);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (it.isUnused())
|
2021-03-15 07:39:51 +00:00
|
|
|
return end();
|
|
|
|
return const_iterator({d, it.toBucketIndex(d)});
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2024-02-03 01:55:03 +00:00
|
|
|
|
|
|
|
public:
|
|
|
|
typedef iterator Iterator;
|
|
|
|
typedef const_iterator ConstIterator;
|
|
|
|
inline qsizetype count() const noexcept { return d ? qsizetype(d->size) : 0; }
|
|
|
|
iterator find(const Key &key)
|
|
|
|
{
|
|
|
|
return findImpl(key);
|
|
|
|
}
|
|
|
|
const_iterator find(const Key &key) const noexcept
|
|
|
|
{
|
|
|
|
return constFindImpl(key);
|
|
|
|
}
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
const_iterator constFind(const Key &key) const noexcept
|
|
|
|
{
|
|
|
|
return find(key);
|
|
|
|
}
|
|
|
|
iterator insert(const Key &key, const T &value)
|
|
|
|
{
|
2020-02-07 11:29:59 +00:00
|
|
|
return emplace(key, value);
|
2013-05-03 14:13:55 +00:00
|
|
|
}
|
2020-02-07 11:29:59 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
void insert(const QHash &hash)
|
|
|
|
{
|
|
|
|
if (d == hash.d || !hash.d)
|
|
|
|
return;
|
|
|
|
if (!d) {
|
|
|
|
*this = hash;
|
|
|
|
return;
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
detach();
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
for (auto it = hash.begin(); it != hash.end(); ++it)
|
2020-02-07 11:29:59 +00:00
|
|
|
emplace(it.key(), it.value());
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename ...Args>
|
|
|
|
iterator emplace(const Key &key, Args &&... args)
|
|
|
|
{
|
2020-04-10 09:50:40 +00:00
|
|
|
Key copy = key; // Needs to be explicit for MSVC 2019
|
|
|
|
return emplace(std::move(copy), std::forward<Args>(args)...);
|
2020-02-07 11:29:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename ...Args>
|
|
|
|
iterator emplace(Key &&key, Args &&... args)
|
|
|
|
{
|
2021-10-26 13:04:19 +00:00
|
|
|
if (isDetached()) {
|
|
|
|
if (d->shouldGrow()) // Construct the value now so that no dangling references are used
|
|
|
|
return emplace_helper(std::move(key), T(std::forward<Args>(args)...));
|
|
|
|
return emplace_helper(std::move(key), std::forward<Args>(args)...);
|
|
|
|
}
|
|
|
|
// else: we must detach
|
|
|
|
const auto copy = *this; // keep 'args' alive across the detach/growth
|
2020-02-07 11:29:59 +00:00
|
|
|
detach();
|
2021-10-26 13:04:19 +00:00
|
|
|
return emplace_helper(std::move(key), std::forward<Args>(args)...);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2024-01-31 13:26:27 +00:00
|
|
|
template <typename... Args>
|
|
|
|
TryEmplaceResult tryEmplace(const Key &key, Args &&...args)
|
|
|
|
{
|
|
|
|
return tryEmplace_impl(key, std::forward<Args>(args)...);
|
|
|
|
}
|
|
|
|
template <typename... Args>
|
|
|
|
TryEmplaceResult tryEmplace(Key &&key, Args &&...args)
|
|
|
|
{
|
|
|
|
return tryEmplace_impl(std::move(key), std::forward<Args>(args)...);
|
|
|
|
}
|
|
|
|
|
2024-10-22 20:06:08 +00:00
|
|
|
TryEmplaceResult tryInsert(const Key &key, const T &value)
|
|
|
|
{
|
|
|
|
return tryEmplace_impl(key, value);
|
|
|
|
}
|
|
|
|
|
2024-01-31 13:26:27 +00:00
|
|
|
template <typename... Args>
|
|
|
|
std::pair<key_value_iterator, bool> try_emplace(const Key &key, Args &&...args)
|
|
|
|
{
|
2024-11-28 12:08:25 +00:00
|
|
|
return tryEmplace_impl(key, std::forward<Args>(args)...);
|
2024-01-31 13:26:27 +00:00
|
|
|
}
|
|
|
|
template <typename... Args>
|
|
|
|
std::pair<key_value_iterator, bool> try_emplace(Key &&key, Args &&...args)
|
|
|
|
{
|
2024-11-28 12:08:25 +00:00
|
|
|
return tryEmplace_impl(std::move(key), std::forward<Args>(args)...);
|
2024-01-31 13:26:27 +00:00
|
|
|
}
|
|
|
|
template <typename... Args>
|
|
|
|
key_value_iterator try_emplace(const_iterator /*hint*/, const Key &key, Args &&...args)
|
|
|
|
{
|
2024-11-28 12:08:25 +00:00
|
|
|
return key_value_iterator(tryEmplace_impl(key, std::forward<Args>(args)...).iterator);
|
2024-01-31 13:26:27 +00:00
|
|
|
}
|
|
|
|
template <typename... Args>
|
|
|
|
key_value_iterator try_emplace(const_iterator /*hint*/, Key &&key, Args &&...args)
|
|
|
|
{
|
2024-11-28 12:08:25 +00:00
|
|
|
return key_value_iterator(tryEmplace_impl(std::move(key), std::forward<Args>(args)...).iterator);
|
2024-01-31 13:26:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
template <typename K, typename... Args>
|
|
|
|
TryEmplaceResult tryEmplace_impl(K &&key, Args &&...args)
|
|
|
|
{
|
|
|
|
if (!d)
|
|
|
|
detach();
|
|
|
|
QHash detachGuard;
|
|
|
|
|
|
|
|
typename Data::Bucket bucket = d->findBucket(key);
|
|
|
|
const bool shouldInsert = bucket.isUnused();
|
|
|
|
|
|
|
|
// Even if we don't insert we may have to detach because we are
|
|
|
|
// returning a non-const iterator:
|
|
|
|
if (!isDetached() || (shouldInsert && d->shouldGrow())) {
|
|
|
|
detachGuard = *this;
|
|
|
|
const bool resized = shouldInsert && d->shouldGrow();
|
|
|
|
const size_t bucketIndex = bucket.toBucketIndex(d);
|
|
|
|
// Like reserve(), but unconditionally detaching if no need to grow:
|
|
|
|
if (isDetached())
|
|
|
|
d->rehash(d->size + 1);
|
|
|
|
else
|
|
|
|
d = Data::detached(d, d->size + (shouldInsert ? 1 : 0));
|
|
|
|
bucket = resized ? d->findBucket(key) : typename Data::Bucket(d, bucketIndex);
|
|
|
|
}
|
|
|
|
if (shouldInsert) {
|
|
|
|
Node *n = bucket.insert();
|
|
|
|
using ConstructProxy = typename QHashPrivate::HeterogenousConstructProxy<Key, K>;
|
|
|
|
Node::createInPlace(n, ConstructProxy(std::forward<K>(key)),
|
|
|
|
std::forward<Args>(args)...);
|
|
|
|
++d->size;
|
|
|
|
}
|
|
|
|
return {iterator(bucket.toIterator(d)), shouldInsert};
|
|
|
|
}
|
|
|
|
public:
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
float load_factor() const noexcept { return d ? d->loadFactor() : 0; }
|
|
|
|
static float max_load_factor() noexcept { return 0.5; }
|
|
|
|
size_t bucket_count() const noexcept { return d ? d->numBuckets : 0; }
|
2023-02-06 23:13:51 +00:00
|
|
|
static size_t max_bucket_count() noexcept { return Data::maxNumBuckets(); }
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2024-12-03 13:06:17 +00:00
|
|
|
[[nodiscard]]
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline bool empty() const noexcept { return isEmpty(); }
|
2021-10-26 13:04:19 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
template <typename ...Args>
|
|
|
|
iterator emplace_helper(Key &&key, Args &&... args)
|
|
|
|
{
|
|
|
|
auto result = d->findOrInsert(key);
|
|
|
|
if (!result.initialized)
|
|
|
|
Node::createInPlace(result.it.node(), std::move(key), std::forward<Args>(args)...);
|
|
|
|
else
|
|
|
|
result.it.node()->emplaceValue(std::forward<Args>(args)...);
|
|
|
|
return iterator(result.it);
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2024-08-14 07:47:29 +00:00
|
|
|
template <typename K>
|
2024-10-13 20:33:13 +00:00
|
|
|
using if_heterogeneously_searchable = QHashPrivate::if_heterogeneously_searchable_with<Key, K>;
|
2024-08-14 07:47:29 +00:00
|
|
|
|
2024-09-06 08:38:44 +00:00
|
|
|
template <typename K>
|
|
|
|
using if_key_constructible_from = std::enable_if_t<std::is_constructible_v<Key, K>, bool>;
|
|
|
|
|
2024-02-03 01:55:03 +00:00
|
|
|
public:
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
bool remove(const K &key)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return removeImpl(key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
T take(const K &key)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return takeImpl(key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
bool contains(const K &key) const
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return d ? d->findNode(key) != nullptr : false;
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
qsizetype count(const K &key) const
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return contains(key) ? 1 : 0;
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
T value(const K &key) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
2024-08-26 11:37:56 +00:00
|
|
|
if (auto *v = valueImpl(key))
|
|
|
|
return *v;
|
|
|
|
else
|
|
|
|
return T();
|
2024-02-03 01:55:03 +00:00
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
T value(const K &key, const T &defaultValue) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
2024-08-26 11:37:56 +00:00
|
|
|
if (auto *v = valueImpl(key))
|
|
|
|
return *v;
|
|
|
|
else
|
|
|
|
return defaultValue;
|
2024-02-03 01:55:03 +00:00
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true, if_key_constructible_from<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
T &operator[](const K &key)
|
2024-02-09 22:52:35 +00:00
|
|
|
{
|
2024-01-31 13:26:27 +00:00
|
|
|
return *tryEmplace(key).iterator;
|
2024-02-09 22:52:35 +00:00
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
const T operator[](const K &key) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return value(key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-02-03 01:55:03 +00:00
|
|
|
std::pair<iterator, iterator>
|
2024-08-14 07:47:29 +00:00
|
|
|
equal_range(const K &key)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return equal_range_impl(*this, key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-02-03 01:55:03 +00:00
|
|
|
std::pair<const_iterator, const_iterator>
|
2024-08-14 07:47:29 +00:00
|
|
|
equal_range(const K &key) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return equal_range_impl(*this, key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
iterator find(const K &key)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return findImpl(key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
const_iterator find(const K &key) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return constFindImpl(key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
const_iterator constFind(const K &key) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return find(key);
|
|
|
|
}
|
2024-01-31 13:26:27 +00:00
|
|
|
template <typename K, typename... Args, if_heterogeneously_searchable<K> = true, if_key_constructible_from<K> = true>
|
|
|
|
TryEmplaceResult tryEmplace(K &&key, Args &&...args)
|
|
|
|
{
|
|
|
|
return tryEmplace_impl(std::forward<K>(key), std::forward<Args>(args)...);
|
|
|
|
}
|
2024-10-22 20:06:08 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true, if_key_constructible_from<K> = true>
|
|
|
|
TryEmplaceResult tryInsert(K &&key, const T &value)
|
|
|
|
{
|
|
|
|
return tryEmplace_impl(std::forward<K>(key), value);
|
|
|
|
}
|
2024-01-31 13:26:27 +00:00
|
|
|
template <typename K, typename... Args, if_heterogeneously_searchable<K> = true, if_key_constructible_from<K> = true>
|
|
|
|
std::pair<key_value_iterator, bool> try_emplace(K &&key, Args &&...args)
|
|
|
|
{
|
2024-11-28 12:08:25 +00:00
|
|
|
return tryEmplace_impl(std::forward<K>(key), std::forward<Args>(args)...);
|
2024-01-31 13:26:27 +00:00
|
|
|
}
|
|
|
|
template <typename K, typename... Args, if_heterogeneously_searchable<K> = true, if_key_constructible_from<K> = true>
|
|
|
|
key_value_iterator try_emplace(const_iterator /*hint*/, K &&key, Args &&...args)
|
|
|
|
{
|
2024-11-28 12:08:25 +00:00
|
|
|
return key_value_iterator(tryEmplace_impl(std::forward<K>(key), std::forward<Args>(args)...).iterator);
|
2024-01-31 13:26:27 +00:00
|
|
|
}
|
2024-02-03 01:55:03 +00:00
|
|
|
};
|
2011-04-27 10:05:43 +00:00
|
|
|
|
|
|
|
|
2020-10-25 13:20:59 +00:00
|
|
|
template <typename Key, typename T>
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
class QMultiHash
|
2011-04-27 10:05:43 +00:00
|
|
|
{
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
using Node = QHashPrivate::MultiNode<Key, T>;
|
|
|
|
using Data = QHashPrivate::Data<Node>;
|
|
|
|
using Chain = QHashPrivate::MultiNodeChain<T>;
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2020-10-20 10:19:21 +00:00
|
|
|
Data *d = nullptr;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
qsizetype m_size = 0;
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
public:
|
|
|
|
using key_type = Key;
|
|
|
|
using mapped_type = T;
|
|
|
|
using value_type = T;
|
|
|
|
using size_type = qsizetype;
|
|
|
|
using difference_type = qsizetype;
|
|
|
|
using reference = T &;
|
|
|
|
using const_reference = const T &;
|
|
|
|
|
|
|
|
QMultiHash() noexcept = default;
|
|
|
|
inline QMultiHash(std::initializer_list<std::pair<Key,T> > list)
|
|
|
|
: d(new Data(list.size()))
|
|
|
|
{
|
|
|
|
for (typename std::initializer_list<std::pair<Key,T> >::const_iterator it = list.begin(); it != list.end(); ++it)
|
|
|
|
insert(it->first, it->second);
|
2011-04-27 10:05:43 +00:00
|
|
|
}
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
#ifdef Q_QDOC
|
|
|
|
template <typename InputIterator>
|
|
|
|
QMultiHash(InputIterator f, InputIterator l);
|
|
|
|
#else
|
|
|
|
template <typename InputIterator, QtPrivate::IfAssociativeIteratorHasKeyAndValue<InputIterator> = true>
|
|
|
|
QMultiHash(InputIterator f, InputIterator l)
|
|
|
|
{
|
|
|
|
QtPrivate::reserveIfForwardIterator(this, f, l);
|
|
|
|
for (; f != l; ++f)
|
|
|
|
insert(f.key(), f.value());
|
2011-04-27 10:05:43 +00:00
|
|
|
}
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
template <typename InputIterator, QtPrivate::IfAssociativeIteratorHasFirstAndSecond<InputIterator> = true>
|
|
|
|
QMultiHash(InputIterator f, InputIterator l)
|
|
|
|
{
|
|
|
|
QtPrivate::reserveIfForwardIterator(this, f, l);
|
2024-08-16 16:03:50 +00:00
|
|
|
for (; f != l; ++f) {
|
|
|
|
auto &&e = *f;
|
|
|
|
using V = decltype(e);
|
|
|
|
insert(std::forward<V>(e).first, std::forward<V>(e).second);
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
}
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
#endif
|
|
|
|
QMultiHash(const QMultiHash &other) noexcept
|
|
|
|
: d(other.d), m_size(other.m_size)
|
|
|
|
{
|
|
|
|
if (d)
|
|
|
|
d->ref.ref();
|
|
|
|
}
|
|
|
|
~QMultiHash()
|
|
|
|
{
|
2020-11-09 16:27:04 +00:00
|
|
|
static_assert(std::is_nothrow_destructible_v<Key>, "Types with throwing destructors are not supported in Qt containers.");
|
|
|
|
static_assert(std::is_nothrow_destructible_v<T>, "Types with throwing destructors are not supported in Qt containers.");
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (d && !d->ref.deref())
|
|
|
|
delete d;
|
2011-04-27 10:05:43 +00:00
|
|
|
}
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
QMultiHash &operator=(const QMultiHash &other) noexcept(std::is_nothrow_destructible<Node>::value)
|
|
|
|
{
|
|
|
|
if (d != other.d) {
|
|
|
|
Data *o = other.d;
|
|
|
|
if (o)
|
|
|
|
o->ref.ref();
|
|
|
|
if (d && !d->ref.deref())
|
|
|
|
delete d;
|
|
|
|
d = o;
|
|
|
|
m_size = other.m_size;
|
|
|
|
}
|
|
|
|
return *this;
|
|
|
|
}
|
2020-10-15 00:30:18 +00:00
|
|
|
QMultiHash(QMultiHash &&other) noexcept
|
2022-09-26 15:38:14 +00:00
|
|
|
: d(std::exchange(other.d, nullptr)),
|
|
|
|
m_size(std::exchange(other.m_size, 0))
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
QMultiHash &operator=(QMultiHash &&other) noexcept(std::is_nothrow_destructible<Node>::value)
|
|
|
|
{
|
|
|
|
QMultiHash moved(std::move(other));
|
|
|
|
swap(moved);
|
|
|
|
return *this;
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2020-10-26 08:30:49 +00:00
|
|
|
explicit QMultiHash(const QHash<Key, T> &other)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
: QMultiHash(other.begin(), other.end())
|
|
|
|
{}
|
2020-10-27 07:22:42 +00:00
|
|
|
|
|
|
|
explicit QMultiHash(QHash<Key, T> &&other)
|
|
|
|
{
|
|
|
|
unite(std::move(other));
|
|
|
|
}
|
QtCore: replace qSwap with std::swap/member-swap where possible
qSwap() is a monster that looks for ADL overloads of swap() and also
detects the noexcept of the wrapped swap() function, so it should only
be used when the argument type is unknown. In the vast majority of
cases, the type is known to be efficiently std::swap()able or to have
a member-swap. Call either of these.
For the common case of pointer types, circumvent the expensive trait
checks on std::swap() by providing a hand-rolled qt_ptr_swap()
template, the advantage being that it can be unconditionally noexcept,
removing all type traits instantiations. Don't document it, otherwise
we'd be unable to pick it to 6.2.
Effects on Clang -ftime-trace of a PCH'ed libQt6Gui.so build:
before:
**** Template sets that took longest to instantiate:
[...]
27766 ms: qSwap<$> (9073 times, avg 3 ms)
[...]
2806 ms: std::swap<$> (1229 times, avg 2 ms)
(30572ms)
after:
**** Template sets that took longest to instantiate:
[...]
5047 ms: qSwap<$> (641 times, avg 7 ms)
[...]
3371 ms: std::swap<$> (1376 times, avg 2 ms)
[qt_ptr_swap<$> does not appear in the top 400, so < 905ms]
(< 9323ms)
As a drive-by, remove superfluous inline keywords and template
ornaments.
Task-number: QTBUG-97601
Pick-to: 6.3 6.2
Change-Id: I88f9b4e3cbece268c4a1238b6d50e5712a1bab5a
Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
Reviewed-by: Joerg Bornemann <joerg.bornemann@qt.io>
Reviewed-by: Fabian Kosmale <fabian.kosmale@qt.io>
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
2022-01-19 00:47:35 +00:00
|
|
|
|
|
|
|
void swap(QMultiHash &other) noexcept
|
|
|
|
{
|
|
|
|
qt_ptr_swap(d, other.d);
|
|
|
|
std::swap(m_size, other.m_size);
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2022-10-11 08:26:28 +00:00
|
|
|
#ifndef Q_QDOC
|
2024-07-31 14:41:02 +00:00
|
|
|
private:
|
|
|
|
template <typename AKey = Key, typename AT = T,
|
|
|
|
QTypeTraits::compare_eq_result_container<QMultiHash, AKey, AT> = true>
|
|
|
|
friend bool comparesEqual(const QMultiHash &lhs, const QMultiHash &rhs) noexcept
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2024-07-31 14:41:02 +00:00
|
|
|
if (lhs.d == rhs.d)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
return true;
|
2024-07-31 14:41:02 +00:00
|
|
|
if (lhs.m_size != rhs.m_size)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
return false;
|
2024-07-31 14:41:02 +00:00
|
|
|
if (lhs.m_size == 0)
|
2020-10-15 00:32:00 +00:00
|
|
|
return true;
|
|
|
|
// equal size, and both non-zero size => d pointers allocated for both
|
2024-07-31 14:41:02 +00:00
|
|
|
Q_ASSERT(lhs.d);
|
|
|
|
Q_ASSERT(rhs.d);
|
|
|
|
if (lhs.d->size != rhs.d->size)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
return false;
|
2024-07-31 14:41:02 +00:00
|
|
|
for (auto it = rhs.d->begin(); it != rhs.d->end(); ++it) {
|
|
|
|
auto *n = lhs.d->findNode(it.node()->key);
|
2021-11-11 10:37:22 +00:00
|
|
|
if (!n)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
return false;
|
|
|
|
Chain *e = it.node()->value;
|
|
|
|
while (e) {
|
2021-11-11 10:37:22 +00:00
|
|
|
Chain *oe = n->value;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
while (oe) {
|
|
|
|
if (oe->value == e->value)
|
|
|
|
break;
|
|
|
|
oe = oe->next;
|
|
|
|
}
|
|
|
|
if (!oe)
|
|
|
|
return false;
|
|
|
|
e = e->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// all values must be the same as size is the same
|
|
|
|
return true;
|
|
|
|
}
|
2024-07-31 14:41:02 +00:00
|
|
|
QT_DECLARE_EQUALITY_OPERATORS_HELPER(QMultiHash, QMultiHash, /* non-constexpr */, noexcept,
|
|
|
|
template <typename AKey = Key, typename AT = T,
|
|
|
|
QTypeTraits::compare_eq_result_container<QMultiHash, AKey, AT> = true>)
|
|
|
|
public:
|
2021-10-18 14:18:15 +00:00
|
|
|
#else
|
2024-07-31 14:41:02 +00:00
|
|
|
friend bool operator==(const QMultiHash &lhs, const QMultiHash &rhs) noexcept;
|
|
|
|
friend bool operator!=(const QMultiHash &lhs, const QMultiHash &rhs) noexcept;
|
2022-10-11 08:26:28 +00:00
|
|
|
#endif // Q_QDOC
|
2020-02-25 15:41:13 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline qsizetype size() const noexcept { return m_size; }
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2024-12-03 13:06:17 +00:00
|
|
|
[[nodiscard]]
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline bool isEmpty() const noexcept { return !m_size; }
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline qsizetype capacity() const noexcept { return d ? qsizetype(d->numBuckets >> 1) : 0; }
|
|
|
|
void reserve(qsizetype size)
|
|
|
|
{
|
2022-03-30 15:09:32 +00:00
|
|
|
// reserve(0) is used in squeeze()
|
|
|
|
if (size && (this->capacity() >= size))
|
|
|
|
return;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (isDetached())
|
|
|
|
d->rehash(size);
|
|
|
|
else
|
|
|
|
d = Data::detached(d, size_t(size));
|
2011-04-27 10:05:43 +00:00
|
|
|
}
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline void squeeze() { reserve(0); }
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline void detach() { if (!d || d->ref.isShared()) d = Data::detached(d); }
|
|
|
|
inline bool isDetached() const noexcept { return d && !d->ref.isShared(); }
|
|
|
|
bool isSharedWith(const QMultiHash &other) const noexcept { return d == other.d; }
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
void clear() noexcept(std::is_nothrow_destructible<Node>::value)
|
|
|
|
{
|
|
|
|
if (d && !d->ref.deref())
|
|
|
|
delete d;
|
|
|
|
d = nullptr;
|
|
|
|
m_size = 0;
|
2011-04-27 10:05:43 +00:00
|
|
|
}
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
qsizetype remove(const Key &key)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return removeImpl(key);
|
|
|
|
}
|
|
|
|
private:
|
|
|
|
template <typename K> qsizetype removeImpl(const K &key)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
|
|
|
if (isEmpty()) // prevents detaching shared null
|
|
|
|
return 0;
|
2021-03-15 07:39:51 +00:00
|
|
|
auto it = d->findBucket(key);
|
|
|
|
size_t bucket = it.toBucketIndex(d);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
detach();
|
2021-03-15 07:39:51 +00:00
|
|
|
it = typename Data::Bucket(d, bucket); // reattach in case of detach
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (it.isUnused())
|
|
|
|
return 0;
|
|
|
|
qsizetype n = Node::freeChain(it.node());
|
|
|
|
m_size -= n;
|
|
|
|
Q_ASSERT(m_size >= 0);
|
|
|
|
d->erase(it);
|
|
|
|
return n;
|
2018-12-03 10:16:18 +00:00
|
|
|
}
|
2024-02-03 01:55:03 +00:00
|
|
|
|
|
|
|
public:
|
2020-10-16 18:52:48 +00:00
|
|
|
template <typename Predicate>
|
|
|
|
qsizetype removeIf(Predicate pred)
|
|
|
|
{
|
|
|
|
return QtPrivate::associative_erase_if(*this, pred);
|
|
|
|
}
|
2024-02-03 01:55:03 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
T take(const Key &key)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return takeImpl(key);
|
|
|
|
}
|
|
|
|
private:
|
|
|
|
template <typename K> T takeImpl(const K &key)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
|
|
|
if (isEmpty()) // prevents detaching shared null
|
|
|
|
return T();
|
2021-03-15 07:39:51 +00:00
|
|
|
auto it = d->findBucket(key);
|
|
|
|
size_t bucket = it.toBucketIndex(d);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
detach();
|
2021-03-15 07:39:51 +00:00
|
|
|
it = typename Data::Bucket(d, bucket); // reattach in case of detach
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (it.isUnused())
|
|
|
|
return T();
|
|
|
|
Chain *e = it.node()->value;
|
|
|
|
Q_ASSERT(e);
|
2020-04-17 13:27:41 +00:00
|
|
|
T t = std::move(e->value);
|
|
|
|
if (e->next) {
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
it.node()->value = e->next;
|
2020-04-17 13:27:41 +00:00
|
|
|
delete e;
|
|
|
|
} else {
|
|
|
|
// erase() deletes the values.
|
|
|
|
d->erase(it);
|
|
|
|
}
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
--m_size;
|
|
|
|
Q_ASSERT(m_size >= 0);
|
2011-04-27 10:05:43 +00:00
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
2024-02-03 01:55:03 +00:00
|
|
|
public:
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
bool contains(const Key &key) const noexcept
|
|
|
|
{
|
|
|
|
if (!d)
|
|
|
|
return false;
|
|
|
|
return d->findNode(key) != nullptr;
|
2013-07-12 17:17:33 +00:00
|
|
|
}
|
|
|
|
|
2021-11-09 10:04:46 +00:00
|
|
|
private:
|
2024-08-26 11:37:56 +00:00
|
|
|
const Key *keyImpl(const T &value) const noexcept
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
|
|
|
if (d) {
|
|
|
|
auto i = d->begin();
|
|
|
|
while (i != d->end()) {
|
|
|
|
Chain *e = i.node()->value;
|
|
|
|
if (e->contains(value))
|
2024-08-26 11:37:56 +00:00
|
|
|
return &i.node()->key;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
++i;
|
|
|
|
}
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2024-08-26 11:37:56 +00:00
|
|
|
return nullptr;
|
2021-11-09 10:04:46 +00:00
|
|
|
}
|
|
|
|
public:
|
|
|
|
Key key(const T &value) const noexcept
|
|
|
|
{
|
2024-08-26 11:37:56 +00:00
|
|
|
if (auto *k = keyImpl(value))
|
|
|
|
return *k;
|
|
|
|
else
|
|
|
|
return Key();
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2021-11-09 10:04:46 +00:00
|
|
|
Key key(const T &value, const Key &defaultKey) const noexcept
|
|
|
|
{
|
2024-08-26 11:37:56 +00:00
|
|
|
if (auto *k = keyImpl(value))
|
|
|
|
return *k;
|
|
|
|
else
|
|
|
|
return defaultKey;
|
2021-11-09 10:04:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2024-08-26 11:37:56 +00:00
|
|
|
template <typename K>
|
|
|
|
T *valueImpl(const K &key) const noexcept
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
|
|
|
if (d) {
|
|
|
|
Node *n = d->findNode(key);
|
|
|
|
if (n) {
|
|
|
|
Q_ASSERT(n->value);
|
2024-08-26 11:37:56 +00:00
|
|
|
return &n->value->value;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
|
|
|
}
|
2024-08-26 11:37:56 +00:00
|
|
|
return nullptr;
|
2021-11-09 10:04:46 +00:00
|
|
|
}
|
|
|
|
public:
|
|
|
|
T value(const Key &key) const noexcept
|
|
|
|
{
|
2024-08-26 11:37:56 +00:00
|
|
|
if (auto *v = valueImpl(key))
|
|
|
|
return *v;
|
|
|
|
else
|
|
|
|
return T();
|
2021-11-09 10:04:46 +00:00
|
|
|
}
|
|
|
|
T value(const Key &key, const T &defaultValue) const noexcept
|
|
|
|
{
|
2024-08-26 11:37:56 +00:00
|
|
|
if (auto *v = valueImpl(key))
|
|
|
|
return *v;
|
|
|
|
else
|
|
|
|
return defaultValue;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
T &operator[](const Key &key)
|
2024-02-09 22:52:35 +00:00
|
|
|
{
|
|
|
|
return operatorIndexImpl(key);
|
|
|
|
}
|
|
|
|
private:
|
|
|
|
template <typename K> T &operatorIndexImpl(const K &key)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2021-10-26 13:04:19 +00:00
|
|
|
const auto copy = isDetached() ? QMultiHash() : *this; // keep 'key' alive across the detach
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
detach();
|
2020-02-07 11:29:59 +00:00
|
|
|
auto result = d->findOrInsert(key);
|
|
|
|
Q_ASSERT(!result.it.atEnd());
|
2023-04-03 18:03:04 +00:00
|
|
|
if (!result.initialized) {
|
2024-02-09 22:52:35 +00:00
|
|
|
Node::createInPlace(result.it.node(), Key(key), T());
|
2023-04-03 18:03:04 +00:00
|
|
|
++m_size;
|
|
|
|
}
|
2020-02-07 11:29:59 +00:00
|
|
|
return result.it.node()->value->value;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2024-02-09 22:52:35 +00:00
|
|
|
public:
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
const T operator[](const Key &key) const noexcept
|
|
|
|
{
|
|
|
|
return value(key);
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2020-06-22 09:06:12 +00:00
|
|
|
QList<Key> uniqueKeys() const
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2020-06-22 09:06:12 +00:00
|
|
|
QList<Key> res;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (d) {
|
|
|
|
auto i = d->begin();
|
|
|
|
while (i != d->end()) {
|
|
|
|
res.append(i.node()->key);
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2020-06-22 09:06:12 +00:00
|
|
|
QList<Key> keys() const { return QList<Key>(keyBegin(), keyEnd()); }
|
|
|
|
QList<Key> keys(const T &value) const
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2020-06-22 09:06:12 +00:00
|
|
|
QList<Key> res;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
const_iterator i = begin();
|
|
|
|
while (i != end()) {
|
2021-07-07 12:22:13 +00:00
|
|
|
if (i.value() == value)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
res.append(i.key());
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
2024-02-03 01:55:03 +00:00
|
|
|
|
2020-06-22 09:06:12 +00:00
|
|
|
QList<T> values() const { return QList<T>(begin(), end()); }
|
|
|
|
QList<T> values(const Key &key) const
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return valuesImpl(key);
|
|
|
|
}
|
|
|
|
private:
|
|
|
|
template <typename K> QList<T> valuesImpl(const K &key) const
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2020-06-22 09:06:12 +00:00
|
|
|
QList<T> values;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (d) {
|
|
|
|
Node *n = d->findNode(key);
|
|
|
|
if (n) {
|
|
|
|
Chain *e = n->value;
|
|
|
|
while (e) {
|
|
|
|
values.append(e->value);
|
|
|
|
e = e->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return values;
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2024-02-03 01:55:03 +00:00
|
|
|
public:
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
class const_iterator;
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
class iterator
|
|
|
|
{
|
|
|
|
using piter = typename QHashPrivate::iterator<Node>;
|
|
|
|
friend class const_iterator;
|
|
|
|
friend class QMultiHash<Key, T>;
|
|
|
|
piter i;
|
|
|
|
Chain **e = nullptr;
|
|
|
|
explicit inline iterator(piter it, Chain **entry = nullptr) noexcept : i(it), e(entry)
|
|
|
|
{
|
|
|
|
if (!it.atEnd() && !e) {
|
|
|
|
e = &it.node()->value;
|
|
|
|
Q_ASSERT(e && *e);
|
|
|
|
}
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
public:
|
|
|
|
typedef std::forward_iterator_tag iterator_category;
|
|
|
|
typedef qptrdiff difference_type;
|
|
|
|
typedef T value_type;
|
|
|
|
typedef T *pointer;
|
|
|
|
typedef T &reference;
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
constexpr iterator() noexcept = default;
|
|
|
|
|
|
|
|
inline const Key &key() const noexcept { return i.node()->key; }
|
|
|
|
inline T &value() const noexcept { return (*e)->value; }
|
|
|
|
inline T &operator*() const noexcept { return (*e)->value; }
|
|
|
|
inline T *operator->() const noexcept { return &(*e)->value; }
|
|
|
|
inline bool operator==(const iterator &o) const noexcept { return e == o.e; }
|
|
|
|
inline bool operator!=(const iterator &o) const noexcept { return e != o.e; }
|
|
|
|
|
|
|
|
inline iterator &operator++() noexcept {
|
|
|
|
Q_ASSERT(e && *e);
|
|
|
|
e = &(*e)->next;
|
|
|
|
Q_ASSERT(e);
|
|
|
|
if (!*e) {
|
|
|
|
++i;
|
|
|
|
e = i.atEnd() ? nullptr : &i.node()->value;
|
|
|
|
}
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
inline iterator operator++(int) noexcept {
|
|
|
|
iterator r = *this;
|
|
|
|
++(*this);
|
|
|
|
return r;
|
|
|
|
}
|
2015-05-27 10:20:57 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline bool operator==(const const_iterator &o) const noexcept { return e == o.e; }
|
|
|
|
inline bool operator!=(const const_iterator &o) const noexcept { return e != o.e; }
|
|
|
|
};
|
|
|
|
friend class iterator;
|
2015-05-27 10:20:57 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
class const_iterator
|
|
|
|
{
|
|
|
|
using piter = typename QHashPrivate::iterator<Node>;
|
|
|
|
friend class iterator;
|
|
|
|
friend class QMultiHash<Key, T>;
|
|
|
|
piter i;
|
|
|
|
Chain **e = nullptr;
|
|
|
|
explicit inline const_iterator(piter it, Chain **entry = nullptr) noexcept : i(it), e(entry)
|
|
|
|
{
|
|
|
|
if (!it.atEnd() && !e) {
|
|
|
|
e = &it.node()->value;
|
|
|
|
Q_ASSERT(e && *e);
|
|
|
|
}
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
public:
|
|
|
|
typedef std::forward_iterator_tag iterator_category;
|
|
|
|
typedef qptrdiff difference_type;
|
|
|
|
typedef T value_type;
|
|
|
|
typedef const T *pointer;
|
|
|
|
typedef const T &reference;
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
constexpr const_iterator() noexcept = default;
|
|
|
|
inline const_iterator(const iterator &o) noexcept : i(o.i), e(o.e) { }
|
|
|
|
|
|
|
|
inline const Key &key() const noexcept { return i.node()->key; }
|
|
|
|
inline T &value() const noexcept { return (*e)->value; }
|
|
|
|
inline T &operator*() const noexcept { return (*e)->value; }
|
|
|
|
inline T *operator->() const noexcept { return &(*e)->value; }
|
|
|
|
inline bool operator==(const const_iterator &o) const noexcept { return e == o.e; }
|
|
|
|
inline bool operator!=(const const_iterator &o) const noexcept { return e != o.e; }
|
|
|
|
|
|
|
|
inline const_iterator &operator++() noexcept {
|
|
|
|
Q_ASSERT(e && *e);
|
|
|
|
e = &(*e)->next;
|
|
|
|
Q_ASSERT(e);
|
|
|
|
if (!*e) {
|
|
|
|
++i;
|
|
|
|
e = i.atEnd() ? nullptr : &i.node()->value;
|
|
|
|
}
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
inline const_iterator operator++(int) noexcept
|
|
|
|
{
|
|
|
|
const_iterator r = *this;
|
|
|
|
++(*this);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
friend class const_iterator;
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
class key_iterator
|
|
|
|
{
|
|
|
|
const_iterator i;
|
2017-04-25 14:58:57 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
public:
|
|
|
|
typedef typename const_iterator::iterator_category iterator_category;
|
|
|
|
typedef qptrdiff difference_type;
|
|
|
|
typedef Key value_type;
|
|
|
|
typedef const Key *pointer;
|
|
|
|
typedef const Key &reference;
|
2017-04-25 14:58:57 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
key_iterator() noexcept = default;
|
|
|
|
explicit key_iterator(const_iterator o) noexcept : i(o) { }
|
2017-04-25 14:58:57 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
const Key &operator*() const noexcept { return i.key(); }
|
|
|
|
const Key *operator->() const noexcept { return &i.key(); }
|
|
|
|
bool operator==(key_iterator o) const noexcept { return i == o.i; }
|
|
|
|
bool operator!=(key_iterator o) const noexcept { return i != o.i; }
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline key_iterator &operator++() noexcept { ++i; return *this; }
|
|
|
|
inline key_iterator operator++(int) noexcept { return key_iterator(i++);}
|
|
|
|
const_iterator base() const noexcept { return i; }
|
|
|
|
};
|
2016-02-07 02:49:33 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
typedef QKeyValueIterator<const Key&, const T&, const_iterator> const_key_value_iterator;
|
|
|
|
typedef QKeyValueIterator<const Key&, T&, iterator> key_value_iterator;
|
2016-02-07 02:49:33 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
// STL style
|
2024-10-17 11:56:36 +00:00
|
|
|
inline iterator begin() { if (!d) return iterator(); detach(); return iterator(d->begin()); }
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline const_iterator begin() const noexcept { return d ? const_iterator(d->begin()): const_iterator(); }
|
|
|
|
inline const_iterator cbegin() const noexcept { return d ? const_iterator(d->begin()): const_iterator(); }
|
|
|
|
inline const_iterator constBegin() const noexcept { return d ? const_iterator(d->begin()): const_iterator(); }
|
|
|
|
inline iterator end() noexcept { return iterator(); }
|
|
|
|
inline const_iterator end() const noexcept { return const_iterator(); }
|
|
|
|
inline const_iterator cend() const noexcept { return const_iterator(); }
|
|
|
|
inline const_iterator constEnd() const noexcept { return const_iterator(); }
|
|
|
|
inline key_iterator keyBegin() const noexcept { return key_iterator(begin()); }
|
|
|
|
inline key_iterator keyEnd() const noexcept { return key_iterator(end()); }
|
|
|
|
inline key_value_iterator keyValueBegin() noexcept { return key_value_iterator(begin()); }
|
|
|
|
inline key_value_iterator keyValueEnd() noexcept { return key_value_iterator(end()); }
|
|
|
|
inline const_key_value_iterator keyValueBegin() const noexcept { return const_key_value_iterator(begin()); }
|
|
|
|
inline const_key_value_iterator constKeyValueBegin() const noexcept { return const_key_value_iterator(begin()); }
|
|
|
|
inline const_key_value_iterator keyValueEnd() const noexcept { return const_key_value_iterator(end()); }
|
|
|
|
inline const_key_value_iterator constKeyValueEnd() const noexcept { return const_key_value_iterator(end()); }
|
Associative containers: add a way to obtain a key/value range
Our associative containers' iterator's value_type isn't a destructurable
type (yielding key/value). This means that something like
for (auto [k, v] : map)
doesn't even compile -- one can only "directly" iterate on the
values. For quite some time we've had QKeyValueIterator to allow
key/value iteration, but then one had to resort to a "traditional" for
loop:
for (auto i = map.keyValueBegin(), e = keyValueEnd(); i!=e; ++i)
This can be easily packaged in an adaptor class, which is what this
commmit does, thereby offering a C++17-compatible way to obtain
key/value iteration over associative containers.
Something possibly peculiar is the fact that the range so obtained is
a range of pairs of references -- not a range of references to pairs.
But that's easily explained by the fact that we have no pairs to build
references to; hence,
for (auto &[k, v] : map.asKeyValueRange())
doesn't compile (lvalue reference doesn't bind to prvalue pair).
Instead, both of these compile:
for (auto [k, v] : map.asKeyValueRange())
for (auto &&[k, v] : map.asKeyValueRange())
and in *both* cases one gets references to the keys/values in the map.
If the map is non-const, the reference to the value is mutable.
Last but not least, implement pinning for rvalue containers.
[ChangeLog][QtCore][QMap] Added asKeyValueRange().
[ChangeLog][QtCore][QMultiMap] Added asKeyValueRange().
[ChangeLog][QtCore][QHash] Added asKeyValueRange().
[ChangeLog][QtCore][QMultiHash] Added asKeyValueRange().
Task-number: QTBUG-4615
Change-Id: Ic8506bff38b2f753494b21ab76f52e05c06ffc8b
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
2022-03-01 03:24:38 +00:00
|
|
|
auto asKeyValueRange() & { return QtPrivate::QKeyValueRange(*this); }
|
|
|
|
auto asKeyValueRange() const & { return QtPrivate::QKeyValueRange(*this); }
|
|
|
|
auto asKeyValueRange() && { return QtPrivate::QKeyValueRange(std::move(*this)); }
|
|
|
|
auto asKeyValueRange() const && { return QtPrivate::QKeyValueRange(std::move(*this)); }
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
|
|
|
|
iterator detach(const_iterator it)
|
|
|
|
{
|
|
|
|
auto i = it.i;
|
|
|
|
Chain **e = it.e;
|
|
|
|
if (d->ref.isShared()) {
|
|
|
|
// need to store iterator position before detaching
|
|
|
|
qsizetype n = 0;
|
|
|
|
Chain *entry = i.node()->value;
|
|
|
|
while (entry != *it.e) {
|
|
|
|
++n;
|
|
|
|
entry = entry->next;
|
|
|
|
}
|
|
|
|
Q_ASSERT(entry);
|
|
|
|
detach_helper();
|
2016-02-07 02:49:33 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
i = d->detachedIterator(i);
|
|
|
|
e = &i.node()->value;
|
|
|
|
while (n) {
|
|
|
|
e = &(*e)->next;
|
|
|
|
--n;
|
|
|
|
}
|
|
|
|
Q_ASSERT(e && *e);
|
|
|
|
}
|
|
|
|
return iterator(i, e);
|
2016-02-07 02:49:33 +00:00
|
|
|
}
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
iterator erase(const_iterator it)
|
|
|
|
{
|
|
|
|
Q_ASSERT(d);
|
2021-12-06 18:21:54 +00:00
|
|
|
iterator iter = detach(it);
|
|
|
|
iterator i = iter;
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
Chain *e = *i.e;
|
|
|
|
Chain *next = e->next;
|
|
|
|
*i.e = next;
|
|
|
|
delete e;
|
|
|
|
if (!next) {
|
|
|
|
if (i.e == &i.i.node()->value) {
|
|
|
|
// last remaining entry, erase
|
2021-03-15 07:39:51 +00:00
|
|
|
typename Data::Bucket bucket(i.i);
|
2021-03-15 07:39:51 +00:00
|
|
|
d->erase(bucket);
|
|
|
|
if (bucket.toBucketIndex(d) == d->numBuckets - 1 || bucket.isUnused())
|
|
|
|
i = iterator(++iter.i);
|
|
|
|
else // 'i' currently has a nullptr chain. So, we must recreate it
|
|
|
|
i = iterator(bucket.toIterator(d));
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
} else {
|
2021-12-06 18:21:54 +00:00
|
|
|
i = iterator(++iter.i);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
--m_size;
|
|
|
|
Q_ASSERT(m_size >= 0);
|
|
|
|
return i;
|
|
|
|
}
|
2016-02-07 02:49:33 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
// more Qt
|
|
|
|
typedef iterator Iterator;
|
|
|
|
typedef const_iterator ConstIterator;
|
|
|
|
inline qsizetype count() const noexcept { return size(); }
|
2024-02-03 01:55:03 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
template <typename K> iterator findImpl(const K &key)
|
2013-01-10 12:45:18 +00:00
|
|
|
{
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (isEmpty())
|
|
|
|
return end();
|
2021-03-15 07:39:51 +00:00
|
|
|
auto it = d->findBucket(key);
|
|
|
|
size_t bucket = it.toBucketIndex(d);
|
2021-10-26 13:04:19 +00:00
|
|
|
detach();
|
2021-03-15 07:39:51 +00:00
|
|
|
it = typename Data::Bucket(d, bucket); // reattach in case of detach
|
2021-10-26 13:04:19 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (it.isUnused())
|
2021-03-15 07:39:51 +00:00
|
|
|
return end();
|
|
|
|
return iterator(it.toIterator(d));
|
2013-01-10 12:45:18 +00:00
|
|
|
}
|
2024-02-03 01:55:03 +00:00
|
|
|
template <typename K> const_iterator constFindImpl(const K &key) const noexcept
|
2016-03-03 19:20:42 +00:00
|
|
|
{
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (isEmpty())
|
|
|
|
return end();
|
2021-03-15 07:39:51 +00:00
|
|
|
auto it = d->findBucket(key);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (it.isUnused())
|
2021-03-15 07:39:51 +00:00
|
|
|
return constEnd();
|
|
|
|
return const_iterator(it.toIterator(d));
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2024-02-03 01:55:03 +00:00
|
|
|
public:
|
|
|
|
iterator find(const Key &key)
|
|
|
|
{
|
|
|
|
return findImpl(key);
|
|
|
|
}
|
|
|
|
const_iterator constFind(const Key &key) const noexcept
|
|
|
|
{
|
|
|
|
return constFindImpl(key);
|
|
|
|
}
|
|
|
|
const_iterator find(const Key &key) const noexcept
|
|
|
|
{
|
|
|
|
return constFindImpl(key);
|
|
|
|
}
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
iterator insert(const Key &key, const T &value)
|
2020-02-07 11:29:59 +00:00
|
|
|
{
|
|
|
|
return emplace(key, value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename ...Args>
|
|
|
|
iterator emplace(const Key &key, Args &&... args)
|
|
|
|
{
|
|
|
|
return emplace(Key(key), std::forward<Args>(args)...);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename ...Args>
|
|
|
|
iterator emplace(Key &&key, Args &&... args)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2021-10-26 13:04:19 +00:00
|
|
|
if (isDetached()) {
|
|
|
|
if (d->shouldGrow()) // Construct the value now so that no dangling references are used
|
|
|
|
return emplace_helper(std::move(key), T(std::forward<Args>(args)...));
|
|
|
|
return emplace_helper(std::move(key), std::forward<Args>(args)...);
|
|
|
|
}
|
|
|
|
// else: we must detach
|
|
|
|
const auto copy = *this; // keep 'args' alive across the detach/growth
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
detach();
|
2021-10-26 13:04:19 +00:00
|
|
|
return emplace_helper(std::move(key), std::forward<Args>(args)...);
|
2016-03-03 19:20:42 +00:00
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2020-02-07 11:29:59 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
float load_factor() const noexcept { return d ? d->loadFactor() : 0; }
|
|
|
|
static float max_load_factor() noexcept { return 0.5; }
|
|
|
|
size_t bucket_count() const noexcept { return d ? d->numBuckets : 0; }
|
2023-02-06 23:13:51 +00:00
|
|
|
static size_t max_bucket_count() noexcept { return Data::maxNumBuckets(); }
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2024-12-03 13:06:17 +00:00
|
|
|
[[nodiscard]]
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
inline bool empty() const noexcept { return isEmpty(); }
|
2018-11-26 09:37:10 +00:00
|
|
|
|
2020-02-07 11:29:59 +00:00
|
|
|
inline iterator replace(const Key &key, const T &value)
|
|
|
|
{
|
|
|
|
return emplaceReplace(key, value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename ...Args>
|
|
|
|
iterator emplaceReplace(const Key &key, Args &&... args)
|
|
|
|
{
|
|
|
|
return emplaceReplace(Key(key), std::forward<Args>(args)...);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename ...Args>
|
|
|
|
iterator emplaceReplace(Key &&key, Args &&... args)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2021-10-26 13:04:19 +00:00
|
|
|
if (isDetached()) {
|
|
|
|
if (d->shouldGrow()) // Construct the value now so that no dangling references are used
|
|
|
|
return emplaceReplace_helper(std::move(key), T(std::forward<Args>(args)...));
|
|
|
|
return emplaceReplace_helper(std::move(key), std::forward<Args>(args)...);
|
2020-02-07 11:29:59 +00:00
|
|
|
}
|
2021-10-26 13:04:19 +00:00
|
|
|
// else: we must detach
|
|
|
|
const auto copy = *this; // keep 'args' alive across the detach/growth
|
|
|
|
detach();
|
|
|
|
return emplaceReplace_helper(std::move(key), std::forward<Args>(args)...);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
|
|
|
inline QMultiHash &operator+=(const QMultiHash &other)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{ this->unite(other); return *this; }
|
2011-04-27 10:05:43 +00:00
|
|
|
inline QMultiHash operator+(const QMultiHash &other) const
|
|
|
|
{ QMultiHash result = *this; result += other; return result; }
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
bool contains(const Key &key, const T &value) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return containsImpl(key, value);
|
|
|
|
}
|
|
|
|
private:
|
|
|
|
template <typename K> bool containsImpl(const K &key, const T &value) const noexcept
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
|
|
|
if (isEmpty())
|
|
|
|
return false;
|
|
|
|
auto n = d->findNode(key);
|
|
|
|
if (n == nullptr)
|
|
|
|
return false;
|
|
|
|
return n->value->contains(value);
|
|
|
|
}
|
2018-11-26 09:37:10 +00:00
|
|
|
|
2024-02-03 01:55:03 +00:00
|
|
|
public:
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
qsizetype remove(const Key &key, const T &value)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return removeImpl(key, value);
|
|
|
|
}
|
|
|
|
private:
|
|
|
|
template <typename K> qsizetype removeImpl(const K &key, const T &value)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
|
|
|
if (isEmpty()) // prevents detaching shared null
|
2021-07-07 12:51:36 +00:00
|
|
|
return 0;
|
2021-03-15 07:39:51 +00:00
|
|
|
auto it = d->findBucket(key);
|
|
|
|
size_t bucket = it.toBucketIndex(d);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
detach();
|
2021-03-15 07:39:51 +00:00
|
|
|
it = typename Data::Bucket(d, bucket); // reattach in case of detach
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (it.isUnused())
|
|
|
|
return 0;
|
|
|
|
qsizetype n = 0;
|
|
|
|
Chain **e = &it.node()->value;
|
|
|
|
while (*e) {
|
|
|
|
Chain *entry = *e;
|
|
|
|
if (entry->value == value) {
|
|
|
|
*e = entry->next;
|
|
|
|
delete entry;
|
|
|
|
++n;
|
|
|
|
} else {
|
|
|
|
e = &entry->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!it.node()->value)
|
|
|
|
d->erase(it);
|
|
|
|
m_size -= n;
|
|
|
|
Q_ASSERT(m_size >= 0);
|
|
|
|
return n;
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2024-02-03 01:55:03 +00:00
|
|
|
public:
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
qsizetype count(const Key &key) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return countImpl(key);
|
|
|
|
}
|
|
|
|
private:
|
|
|
|
template <typename K> qsizetype countImpl(const K &key) const noexcept
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2021-03-11 09:01:04 +00:00
|
|
|
if (!d)
|
|
|
|
return 0;
|
2021-03-15 07:39:51 +00:00
|
|
|
auto it = d->findBucket(key);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (it.isUnused())
|
|
|
|
return 0;
|
|
|
|
qsizetype n = 0;
|
|
|
|
Chain *e = it.node()->value;
|
|
|
|
while (e) {
|
|
|
|
++n;
|
|
|
|
e = e->next;
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
return n;
|
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2024-02-03 01:55:03 +00:00
|
|
|
public:
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
qsizetype count(const Key &key, const T &value) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return countImpl(key, value);
|
|
|
|
}
|
|
|
|
private:
|
|
|
|
template <typename K> qsizetype countImpl(const K &key, const T &value) const noexcept
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2021-03-11 09:01:04 +00:00
|
|
|
if (!d)
|
|
|
|
return 0;
|
2021-03-15 07:39:51 +00:00
|
|
|
auto it = d->findBucket(key);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
if (it.isUnused())
|
|
|
|
return 0;
|
|
|
|
qsizetype n = 0;
|
|
|
|
Chain *e = it.node()->value;
|
|
|
|
while (e) {
|
|
|
|
if (e->value == value)
|
|
|
|
++n;
|
|
|
|
e = e->next;
|
|
|
|
}
|
2018-11-26 09:37:10 +00:00
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
return n;
|
|
|
|
}
|
2018-11-26 09:37:10 +00:00
|
|
|
|
2024-02-03 01:55:03 +00:00
|
|
|
template <typename K> iterator findImpl(const K &key, const T &value)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2021-07-07 12:51:36 +00:00
|
|
|
if (isEmpty())
|
|
|
|
return end();
|
2021-10-26 13:04:19 +00:00
|
|
|
const auto copy = isDetached() ? QMultiHash() : *this; // keep 'key'/'value' alive across the detach
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
detach();
|
|
|
|
auto it = constFind(key, value);
|
|
|
|
return iterator(it.i, it.e);
|
2011-04-27 10:05:43 +00:00
|
|
|
}
|
2024-02-03 01:55:03 +00:00
|
|
|
template <typename K> const_iterator constFindImpl(const K &key, const T &value) const noexcept
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
|
|
|
const_iterator i(constFind(key));
|
|
|
|
const_iterator end(constEnd());
|
2011-04-27 10:05:43 +00:00
|
|
|
while (i != end && i.key() == key) {
|
|
|
|
if (i.value() == value)
|
|
|
|
return i;
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
return end;
|
|
|
|
}
|
2018-11-26 09:37:10 +00:00
|
|
|
|
2024-02-03 01:55:03 +00:00
|
|
|
public:
|
|
|
|
iterator find(const Key &key, const T &value)
|
|
|
|
{
|
|
|
|
return findImpl(key, value);
|
|
|
|
}
|
|
|
|
|
|
|
|
const_iterator constFind(const Key &key, const T &value) const noexcept
|
|
|
|
{
|
|
|
|
return constFindImpl(key, value);
|
|
|
|
}
|
|
|
|
const_iterator find(const Key &key, const T &value) const noexcept
|
|
|
|
{
|
|
|
|
return constFind(key, value);
|
|
|
|
}
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
QMultiHash &unite(const QMultiHash &other)
|
|
|
|
{
|
|
|
|
if (isEmpty()) {
|
|
|
|
*this = other;
|
|
|
|
} else if (other.isEmpty()) {
|
|
|
|
;
|
|
|
|
} else {
|
|
|
|
QMultiHash copy(other);
|
|
|
|
detach();
|
|
|
|
for (auto cit = copy.cbegin(); cit != copy.cend(); ++cit)
|
|
|
|
insert(cit.key(), *cit);
|
2018-11-26 09:37:10 +00:00
|
|
|
}
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
return *this;
|
2018-11-26 09:37:10 +00:00
|
|
|
}
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2020-10-26 08:30:49 +00:00
|
|
|
QMultiHash &unite(const QHash<Key, T> &other)
|
|
|
|
{
|
|
|
|
for (auto cit = other.cbegin(); cit != other.cend(); ++cit)
|
|
|
|
insert(cit.key(), *cit);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2020-10-27 07:22:42 +00:00
|
|
|
QMultiHash &unite(QHash<Key, T> &&other)
|
|
|
|
{
|
|
|
|
if (!other.isDetached()) {
|
|
|
|
unite(other);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
auto it = other.d->begin();
|
|
|
|
for (const auto end = other.d->end(); it != end; ++it)
|
|
|
|
emplace(std::move(it.node()->key), std::move(it.node()->takeValue()));
|
|
|
|
other.clear();
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2023-12-12 08:20:30 +00:00
|
|
|
std::pair<iterator, iterator> equal_range(const Key &key)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return equal_range_impl(key);
|
|
|
|
}
|
|
|
|
private:
|
|
|
|
template <typename K> std::pair<iterator, iterator> equal_range_impl(const K &key)
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2021-10-26 13:04:19 +00:00
|
|
|
const auto copy = isDetached() ? QMultiHash() : *this; // keep 'key' alive across the detach
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
detach();
|
2022-10-06 09:08:21 +00:00
|
|
|
auto pair = std::as_const(*this).equal_range(key);
|
2023-12-12 08:20:30 +00:00
|
|
|
return {iterator(pair.first.i), iterator(pair.second.i)};
|
2011-04-27 10:05:43 +00:00
|
|
|
}
|
|
|
|
|
2024-02-03 01:55:03 +00:00
|
|
|
public:
|
2023-12-12 08:20:30 +00:00
|
|
|
std::pair<const_iterator, const_iterator> equal_range(const Key &key) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return equal_range_impl(key);
|
|
|
|
}
|
|
|
|
private:
|
|
|
|
template <typename K> std::pair<const_iterator, const_iterator> equal_range_impl(const K &key) const noexcept
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
{
|
2020-12-30 02:57:09 +00:00
|
|
|
if (!d)
|
2023-12-12 08:20:30 +00:00
|
|
|
return {end(), end()};
|
2020-12-30 02:57:09 +00:00
|
|
|
|
2021-03-15 07:39:51 +00:00
|
|
|
auto bucket = d->findBucket(key);
|
|
|
|
if (bucket.isUnused())
|
2023-12-12 08:20:30 +00:00
|
|
|
return {end(), end()};
|
2021-03-15 07:39:51 +00:00
|
|
|
auto it = bucket.toIterator(d);
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
auto end = it;
|
|
|
|
++end;
|
2023-12-12 08:20:30 +00:00
|
|
|
return {const_iterator(it), const_iterator(end)};
|
2011-04-27 10:05:43 +00:00
|
|
|
}
|
|
|
|
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
void detach_helper()
|
|
|
|
{
|
|
|
|
if (!d) {
|
|
|
|
d = new Data;
|
|
|
|
return;
|
2018-11-26 09:37:10 +00:00
|
|
|
}
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
Data *dd = new Data(*d);
|
|
|
|
if (!d->ref.deref())
|
|
|
|
delete d;
|
|
|
|
d = dd;
|
2018-11-26 09:37:10 +00:00
|
|
|
}
|
2021-10-26 13:04:19 +00:00
|
|
|
|
|
|
|
template<typename... Args>
|
|
|
|
iterator emplace_helper(Key &&key, Args &&...args)
|
|
|
|
{
|
|
|
|
auto result = d->findOrInsert(key);
|
|
|
|
if (!result.initialized)
|
|
|
|
Node::createInPlace(result.it.node(), std::move(key), std::forward<Args>(args)...);
|
|
|
|
else
|
|
|
|
result.it.node()->insertMulti(std::forward<Args>(args)...);
|
|
|
|
++m_size;
|
|
|
|
return iterator(result.it);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename... Args>
|
|
|
|
iterator emplaceReplace_helper(Key &&key, Args &&...args)
|
|
|
|
{
|
|
|
|
auto result = d->findOrInsert(key);
|
|
|
|
if (!result.initialized) {
|
|
|
|
Node::createInPlace(result.it.node(), std::move(key), std::forward<Args>(args)...);
|
|
|
|
++m_size;
|
|
|
|
} else {
|
|
|
|
result.it.node()->emplaceValue(std::forward<Args>(args)...);
|
|
|
|
}
|
|
|
|
return iterator(result.it);
|
|
|
|
}
|
2024-02-03 01:55:03 +00:00
|
|
|
|
2024-08-14 07:47:29 +00:00
|
|
|
template <typename K>
|
2024-10-13 20:33:13 +00:00
|
|
|
using if_heterogeneously_searchable = QHashPrivate::if_heterogeneously_searchable_with<Key, K>;
|
2024-08-14 07:47:29 +00:00
|
|
|
|
2024-09-06 08:38:44 +00:00
|
|
|
template <typename K>
|
|
|
|
using if_key_constructible_from = std::enable_if_t<std::is_constructible_v<Key, K>, bool>;
|
|
|
|
|
2024-02-03 01:55:03 +00:00
|
|
|
public:
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
qsizetype remove(const K &key)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return removeImpl(key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
T take(const K &key)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return takeImpl(key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
bool contains(const K &key) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
if (!d)
|
|
|
|
return false;
|
|
|
|
return d->findNode(key) != nullptr;
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
T value(const K &key) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
2024-08-26 11:37:56 +00:00
|
|
|
if (auto *v = valueImpl(key))
|
|
|
|
return *v;
|
|
|
|
else
|
|
|
|
return T();
|
2024-02-03 01:55:03 +00:00
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
T value(const K &key, const T &defaultValue) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
2024-08-26 11:37:56 +00:00
|
|
|
if (auto *v = valueImpl(key))
|
|
|
|
return *v;
|
|
|
|
else
|
|
|
|
return defaultValue;
|
2024-02-03 01:55:03 +00:00
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true, if_key_constructible_from<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
T &operator[](const K &key)
|
2024-02-09 22:52:35 +00:00
|
|
|
{
|
|
|
|
return operatorIndexImpl(key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
const T operator[](const K &key) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return value(key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
QList<T> values(const K &key)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return valuesImpl(key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
iterator find(const K &key)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return findImpl(key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
const_iterator constFind(const K &key) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return constFindImpl(key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
const_iterator find(const K &key) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return constFindImpl(key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
bool contains(const K &key, const T &value) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return containsImpl(key, value);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
qsizetype remove(const K &key, const T &value)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return removeImpl(key, value);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
qsizetype count(const K &key) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return countImpl(key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
qsizetype count(const K &key, const T &value) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return countImpl(key, value);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
iterator find(const K &key, const T &value)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return findImpl(key, value);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
const_iterator constFind(const K &key, const T &value) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return constFindImpl(key, value);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-08-14 07:47:29 +00:00
|
|
|
const_iterator find(const K &key, const T &value) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return constFind(key, value);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-02-03 01:55:03 +00:00
|
|
|
std::pair<iterator, iterator>
|
2024-08-14 07:47:29 +00:00
|
|
|
equal_range(const K &key)
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return equal_range_impl(key);
|
|
|
|
}
|
2024-10-13 20:33:13 +00:00
|
|
|
template <typename K, if_heterogeneously_searchable<K> = true>
|
2024-02-03 01:55:03 +00:00
|
|
|
std::pair<const_iterator, const_iterator>
|
2024-08-14 07:47:29 +00:00
|
|
|
equal_range(const K &key) const noexcept
|
2024-02-03 01:55:03 +00:00
|
|
|
{
|
|
|
|
return equal_range_impl(key);
|
|
|
|
}
|
New QHash implementation
A brand new QHash implementation using a faster and more memory efficient data
structure than the old QHash.
A new implementation for QHash. Instead of a node based approach as the old
QHash, this implementation now uses a two stage lookup table. The total
amount of buckets in the table are divided into spans of 128 entries.
Inside each span, we use an array of chars to index into a storage area
for the span.
The storage area for each span is a simple array, that gets (re-)allocated
with size increments of 16 items. This gives an average memory overhead of
8*sizeof(struct{ Key; Value; }) + 128*sizeof(char) + 16 for each span.
To give good performance and avoid too many collisions, the array keeps its
load factor between .25 and .5 (and grows and rehashes if the load factor goes
above .5).
This design allows us to keep the memory overhead of the Hash very small, while
at the same time giving very good performance. The calculated overhead for a
QHash<int, int> comes to 1.7-3.3 bytes per entry and to 2.2-4.3 bytes for
a QHash<ptr, ptr>.
The new implementation also completely splits the QHash and QMultiHash classes.
One behavioral change to note is that the new QHash implementation will not
provide stable references to nodes in the hash when the table needs to grow.
Benchmarking using https://github.com/Tessil/hash-table-shootout shows
very nice performance compared to many different hash table implementation.
Numbers shown below are for a hash<int64, int64> with 1 million entries. These
numbers scale nicely (mostly in a linear fashion with some variation due to
varying load factors) to smaller and larger tables. All numbers are in seconds,
measured with gcc on Linux:
Hash table random random random random reads full
insertion insertion full full after iteration
(reserved) deletes reads deletes
------------------------------------------------------------------------------
std::unordered_map 0,3842 0,1969 0,4511 0,1300 0,1169 0,0708
google::dense_hash_map 0,1091 0,0846 0,0550 0,0452 0,0754 0,0160
google::sparse_hash_map 0,2888 0,1582 0,0948 0,1020 0,1348 0,0112
tsl::sparse_map 0,1487 0,1013 0,0735 0,0448 0,0505 0,0042
old QHash 0,2886 0,1798 0,5065 0,0840 0,0717 0,1387
new QHash 0,0940 0,0714 0,1494 0,0579 0,0449 0,0146
Numbers for hash<std::string, int64>, with the string having 15 characters:
Hash table random random random random reads
insertion insertion full full after
(reserved) deletes reads deletes
--------------------------------------------------------------------
std::unordered_map 0,4993 0,2563 0,5515 0,2950 0,2153
google::dense_hash_map 0,2691 0,1870 0,1547 0,1125 0,1622
google::sparse_hash_map 0,6979 0,3304 0,1884 0,1822 0,2122
tsl::sparse_map 0,4066 0,2586 0,1929 0,1146 0,1095
old QHash 0,3236 0,2064 0,5986 0,2115 0,1666
new QHash 0,2119 0,1652 0,2390 0,1378 0,0965
Memory usage numbers (in MB for a table with 1M entries) also look very nice:
Hash table Key int64 std::string (15 chars)
Value int64 int64
---------------------------------------------------------
std::unordered_map 44.63 75.35
google::dense_hash_map 32.32 80,60
google::sparse_hash_map 18.08 44.21
tsl::sparse_map 20.44 45,93
old QHash 53.95 69,16
new QHash 23.23 51,32
Fixes: QTBUG-80311
Change-Id: I5679734144bc9bca2102acbe725fcc2fa89f0dff
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2020-01-17 13:33:53 +00:00
|
|
|
};
|
2018-11-26 09:37:10 +00:00
|
|
|
|
2020-10-16 10:38:00 +00:00
|
|
|
Q_DECLARE_ASSOCIATIVE_FORWARD_ITERATOR(Hash)
|
|
|
|
Q_DECLARE_MUTABLE_ASSOCIATIVE_FORWARD_ITERATOR(Hash)
|
|
|
|
Q_DECLARE_ASSOCIATIVE_FORWARD_ITERATOR(MultiHash)
|
|
|
|
Q_DECLARE_MUTABLE_ASSOCIATIVE_FORWARD_ITERATOR(MultiHash)
|
2011-04-27 10:05:43 +00:00
|
|
|
|
2016-07-11 09:14:34 +00:00
|
|
|
template <class Key, class T>
|
2020-01-31 11:11:54 +00:00
|
|
|
size_t qHash(const QHash<Key, T> &key, size_t seed = 0)
|
2019-04-04 14:46:41 +00:00
|
|
|
noexcept(noexcept(qHash(std::declval<Key&>())) && noexcept(qHash(std::declval<T&>())))
|
2016-07-11 09:14:34 +00:00
|
|
|
{
|
2020-10-26 08:18:00 +00:00
|
|
|
size_t hash = 0;
|
2016-07-11 09:14:34 +00:00
|
|
|
for (auto it = key.begin(), end = key.end(); it != end; ++it) {
|
2020-10-26 08:18:00 +00:00
|
|
|
QtPrivate::QHashCombine combine;
|
|
|
|
size_t h = combine(seed, it.key());
|
|
|
|
// use + to keep the result independent of the ordering of the keys
|
|
|
|
hash += combine(h, it.value());
|
2016-07-11 09:14:34 +00:00
|
|
|
}
|
2020-10-26 08:18:00 +00:00
|
|
|
return hash;
|
2016-07-11 09:14:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
template <class Key, class T>
|
2020-01-31 11:11:54 +00:00
|
|
|
inline size_t qHash(const QMultiHash<Key, T> &key, size_t seed = 0)
|
2019-04-04 14:46:41 +00:00
|
|
|
noexcept(noexcept(qHash(std::declval<Key&>())) && noexcept(qHash(std::declval<T&>())))
|
2016-07-11 09:14:34 +00:00
|
|
|
{
|
2020-10-26 08:18:00 +00:00
|
|
|
size_t hash = 0;
|
|
|
|
for (auto it = key.begin(), end = key.end(); it != end; ++it) {
|
|
|
|
QtPrivate::QHashCombine combine;
|
|
|
|
size_t h = combine(seed, it.key());
|
|
|
|
// use + to keep the result independent of the ordering of the keys
|
|
|
|
hash += combine(h, it.value());
|
|
|
|
}
|
|
|
|
return hash;
|
2016-07-11 09:14:34 +00:00
|
|
|
}
|
|
|
|
|
2020-10-16 18:52:48 +00:00
|
|
|
template <typename Key, typename T, typename Predicate>
|
|
|
|
qsizetype erase_if(QHash<Key, T> &hash, Predicate pred)
|
|
|
|
{
|
|
|
|
return QtPrivate::associative_erase_if(hash, pred);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename Key, typename T, typename Predicate>
|
|
|
|
qsizetype erase_if(QMultiHash<Key, T> &hash, Predicate pred)
|
|
|
|
{
|
|
|
|
return QtPrivate::associative_erase_if(hash, pred);
|
|
|
|
}
|
|
|
|
|
2011-04-27 10:05:43 +00:00
|
|
|
QT_END_NAMESPACE
|
|
|
|
|
|
|
|
#endif // QHASH_H
|