const List = require("../linear").List
/**
* A `HashMap` is composed of two things:
* - a *hash function*, and
* - a collection (typically an array) to store values
*
* The hash function transforms the keys (e.g. length, width etc.) into a number which
* is used as an array index. The values are placed in a **buckets** array at the hashed
* index. A perfect hash function is one that assigns a unique array index for each
* unique key. It's not practical and memory wasteful to have a perfect hash function.
* This `HashMap` implements a cost-effective hash function instead.
*
* What happens if two different keys map to the same index? Then we have a **collision**.
* In this case, the value at the index will be an array of values instead of a single value.
* Naturally, this reduces the search complexity from `O(1)` to `O(n)`.
*
* Collisions can be avoided by having a big bucket size. This `HashMap` implementation
* re-sizes itself when it is getting full. This avoids collision and keeps memory usage
* to a minimum.
*
* This structure uses a buckets array where each bucket is a {@link List}.
*
* @class
*/
class HashMap {
/**
* @constructor
* @param {number} [capacity=19] initial size of the HashMap data array (preferably a
* prime number)
* @param {number} [rehashThreshold=0.75] a rehash is triggered when array reaches this
* occupancy threshold
*/
constructor(capacity = 19, rehashThreshold = 0.75) {
this._initialCapacity = capacity
this._rehashThreshold = rehashThreshold
this._buckets = new Array(capacity)
this._keys = []
this._size = 0
this._collisions = 0
}
/**
* Returns the number of items in the hash map
* @returns {number}
*/
get count() {
return this._size
}
/**
* Returns a measure of how full the hash map is, i.e. the ratio between items
* on the map and the total size of the bucket.
*
* @returns {number} A number ∈ [0, 1]
*/
get occupancyFactor() {
return this._size / this._buckets.length
}
/**
* The map is rehashed if the occupancy factor exceeds the `rehashThreshold` set
* at the time of instantiation.
*
* @private
* @returns {boolean}
*/
get shouldRehash() {
return this.occupancyFactor > this._rehashThreshold
}
/**
* Returns an array of all keys. May or may not be in insertion order.
*
* @return {array}
*/
get keys() {
return this._keys
}
/**
* Get value for each element in the map. Result in no particular order of the
* corresponding keys.
*
* @returns {Iterator} values
*/
* values() {
for (const key of this._keys) {
yield this.get(key)
}
}
/**
* Get nodes for each element in the map. Result in no particular order of the
* corresponding keys.
*
* @private
* @returns {Iterator} values
*/
* nodes() {
for (const key of this._keys) {
yield this.getNodeFor(key)
}
}
/**
* Get the value for the given `key`. Runtime is usually `O(1)` but if there are
* collisions it could be `O(n)`.
*
* Returns `null` if no such key exists in the map.
*
* @param {any} key
* @returns {?any}
*/
get(key) {
const node = this.getNodeFor(key)
return node ? node.data.value : null
}
/**
* Returns a boolean indicating whether an element with the given `key` exists
* or not. Runtime: `O(1)`
*
* @param {any} key
* @returns {boolean}
*/
has(key) {
return this._keys.includes(key)
}
/**
* Removes the specified key from the map. Runtime `O(1)`.
* Return the value removed or `null` if the key wasn't found in the map.
* @param {any} key
*/
delete(key) {
let entry = this.getNodeFor(key)
if (!entry) {
return null
}
// Deep copy entry before removing the key-value pair from the map
entry = JSON.parse(JSON.stringify(entry))
const index = this.hash(key)
const bucket = this._buckets[index]
const indexToRemove = bucket.indexOf(entry.data)
bucket.remove(indexToRemove)
this._size -= 1
return entry
}
/**
* Returns the {@link Node} for the given `key` or `null` if no such key exists.
* Runtime is usually `O(1)` but if there are collisions it could be `O(n)`.
*
* @private
* @param {any} key key to search for
* @returns {?any}
*/
getNodeFor(key) {
const bucket = this.getBucketFor(key)
if (!bucket) {
return null
}
const listNode = bucket.find((node, position) => {
if (key === node.data.key) {
return node
}
return null
})
return listNode
}
/**
* Returns the bucket for given `key`. If no such bucket exists, returns `null`.
* @private
* @param {?any} key
*/
getBucketFor(key) {
const index = this.hash(key)
return this._buckets[index] || null
}
/**
* Insert a key/value pair into the map. If the key is already present, replaces
* its value. Runtime: `O(1)`, in case a rehash is needed `O(n)`.
*
* Returns the map instance to allow command chaining.
*
* @param {any} key
* @param {any} value
* @returns {HashMap}
*/
add(key, value) {
let bucket = this.getBucketFor(key)
if (!bucket) {
bucket = new List()
this._buckets[this.hash(key)] = bucket
}
const entry = this.getNodeFor(key)
if (!entry) {
bucket.add({ key, value })
this._keys.push(key)
this._size += 1
if (bucket.count > 1) {
this._collisions += 1
}
if (this.shouldRehash) {
this.rehash()
}
}
else {
entry.data.value = value
}
return this
}
/**
* Rehashing minimises collisions when a hash map reaches full occupancy.
* It doubles the size of the map, recomputes all the hash codes, and then
* distributes data among the new buckets.
*
* When increasing the map size, we try to find the next prime as that is
* optimal for minimising collisions.
*
* @private
*/
rehash() {
const newCapacity = Math.max(this._size, this._buckets.length) * 2
const newMap = new HashMap(newCapacity)
for (const key of this._keys) {
newMap.add(key, this.get(key))
}
this._initialCapacity = newMap._initialCapacity
this._rehashThreshold = newMap._rehashThreshold
this._buckets = newMap._buckets
this._keys = newMap._keys
this._size = newMap._size
this._collisions = newMap._collisions
}
/**
* Polynomial hash codes are used to hash keys. The given `key` is first
* converted to a string like so, `String(key)`.
* Uses FVN-1a hashing algorithm for 32 bits.
*
* @private
* @see http://bit.ly/fvn-1a
* @param {any} key
*/
hash(key) {
const str = String(key)
let hash = 2166136261 // FNV_offset_basis (32 bit)
for (let i = 0; i < str.length; i++) {
hash ^= str.codePointAt(i) // XOR
hash *= 16777619 // 32-bit FNV_prime
}
return (hash >>> 0) % this._buckets.length
}
}
module.exports = HashMap