lua源码笔记-table
结构
lua 中的数据结构只有一种,那就是 table。
typedef struct Table {
CommonHeader;
lu_byte flags; /* 1<<p means tagmethod(p) is not present */
lu_byte lsizenode; /* log2 of size of 'node' array */
unsigned int alimit; /* "limit" of 'array' array */
TValue *array; /* array part */
Node *node;
Node *lastfree; /* any free position is before this position */
struct Table *metatable;
GCObject *gclist;
} Table;
在 Table
结构中可以看到,其中有一个 TValue *array
可以存储数据,但是还有另外一部分存储数据的地方,就是由 Node *node
指向的哈希表。对于属于 [1, alimit]
的 key
,会使用 array
存储数据,其余数据则存储在哈希表中。
哈希表数据则存储在 Node *node
这个数组中,数组的长度为 2的 lsizenode
次方,数组中 Node
又通过 NodeKey
中的 next
代表的偏移组成链表。
/*
** Nodes for Hash tables: A pack of two TValue's (key-value pairs)
** plus a 'next' field to link colliding entries. The distribution
** of the key's fields ('key_tt' and 'key_val') not forming a proper
** 'TValue' allows for a smaller size for 'Node' both in 4-byte
** and 8-byte alignments.
*/
typedef union Node {
struct NodeKey {
TValuefields; /* fields for value */
lu_byte key_tt; /* key type */
int next; /* for chaining */
Value key_val; /* key value */
} u;
TValue i_val; /* direct access to node's value as a proper 'TValue' */
} Node;
读取table
(ltable.c)
#define gnode(t,i) (&(t)->node[i])
#define gval(n) (&(n)->i_val)
#define gnext(n) ((n)->u.next)
#define hashpow2(t,n) (gnode(t, lmod((n), sizenode(t))))
/*
** "Generic" get version. (Not that generic: not valid for integers,
** which may be in array part, nor for floats with integral values.)
*/
static const TValue *getgeneric (Table *t, const TValue *key) {
Node *n = mainpositionTV(t, key);
for (;;) { /* check whether 'key' is somewhere in the chain */
if (equalkey(key, n))
return gval(n); /* that's it */
else {
int nx = gnext(n);
if (nx == 0)
return &absentkey; /* not found */
n += nx;
}
}
}
可以看到在从哈希表中查数据时,先通过 key
的哈希值获取数组中对应的 Node* n
,如果发生了哈希碰撞,则通过NodeKey
中的 next
代表的偏移寻找下一个节点。
写入table
写入哈希表时如果发生哈希碰撞,需要寻找一个空节点存储数据。lua 使用 Table
中的 Node *lastfree
指向最后一个空节点,然后往前查找。
(ltable.c)
/* true when 't' is using 'dummynode' as its hash part */
#define isdummy(t) ((t)->lastfree == NULL)
static Node *getfreepos (Table *t) {
if (!isdummy(t)) {
while (t->lastfree > t->node) {
t->lastfree--;
if (keyisnil(t->lastfree))
return t->lastfree;
}
}
return NULL; /* could not find a free place */
}
写入哈希表时先通过哈希值获取数组中对应的节点,如果节点已被占用则调用 getfreepos
获取一个新节点,然后将新节点插入 next
组成的链表中。这里需要特别注意到,当占用位置的节点也是自己的位置被别人占用的节点时,会把占用位置的节点移到新节点,将新数据写入当前位置,这样避免 next
组成的链表过长。
(ltable.c)
/*
** inserts a new key into a hash table; first, check whether key's main
** position is free. If not, check whether colliding node is in its main
** position or not: if it is not, move colliding node to an empty place and
** put new key in its main position; otherwise (colliding node is in its main
** position), new key goes to an empty position.
*/
TValue *luaH_newkey (lua_State *L, Table *t, const TValue *key) {
Node *mp;
TValue aux;
// ...
mp = mainpositionTV(t, key);
if (!isempty(gval(mp)) || isdummy(t)) { /* main position is taken? */
Node *othern;
Node *f = getfreepos(t); /* get a free place */
if (f == NULL) { /* cannot find a free place? */
rehash(L, t, key); /* grow table */
/* whatever called 'newkey' takes care of TM cache */
return luaH_set(L, t, key); /* insert key into grown table */
}
lua_assert(!isdummy(t));
othern = mainposition(t, keytt(mp), &keyval(mp));
if (othern != mp) { /* is colliding node out of its main position? */
/* yes; move colliding node into free position */
while (othern + gnext(othern) != mp) /* find previous */
othern += gnext(othern);
gnext(othern) = cast_int(f - othern); /* rechain to point to 'f' */
*f = *mp; /* copy colliding node into free pos. (mp->next also goes) */
if (gnext(mp) != 0) {
gnext(f) += cast_int(mp - f); /* correct 'next' */
gnext(mp) = 0; /* now 'mp' is free */
}
setempty(gval(mp));
}
else { /* colliding node is in its own main position */
/* new node will go into free position */
if (gnext(mp) != 0)
gnext(f) = cast_int((mp + gnext(mp)) - f); /* chain new position */
else lua_assert(gnext(f) == 0);
gnext(mp) = cast_int(f - mp);
mp = f;
}
}
setnodekey(L, mp, key);
luaC_barrierback(L, obj2gco(t), key);
lua_assert(isempty(gval(mp)));
return gval(mp);
}
空间划分
Table
分成两部分最重要的问题就是每个部分时多大,如果数组部分过小,那么就失去了使用数组部分的好处,如果数组部分过大,空位置太多,则浪费了太多空间,lua使用的是使数组部分使用一半以上的最大的二次方。
(ltable.c)
/*
** Compute the optimal size for the array part of table 't'. 'nums' is a
** "count array" where 'nums[i]' is the number of integers in the table
** between 2^(i - 1) + 1 and 2^i. 'pna' enters with the total number of
** integer keys in the table and leaves with the number of keys that
** will go to the array part; return the optimal size. (The condition
** 'twotoi > 0' in the for loop stops the loop if 'twotoi' overflows.)
*/
static unsigned int computesizes (unsigned int nums[], unsigned int *pna) {
int i;
unsigned int twotoi; /* 2^i (candidate for optimal size) */
unsigned int a = 0; /* number of elements smaller than 2^i */
unsigned int na = 0; /* number of elements to go to array part */
unsigned int optimal = 0; /* optimal size for array part */
/* loop while keys can fill more than half of total size */
for (i = 0, twotoi = 1;
twotoi > 0 && *pna > twotoi / 2;
i++, twotoi *= 2) {
a += nums[i];
if (a > twotoi/2) { /* more than half elements present? */
optimal = twotoi; /* optimal size (till now) */
na = a; /* all elements up to 'optimal' will go to array part */
}
}
lua_assert((optimal == 0 || optimal / 2 < na) && na <= optimal);
*pna = na;
return optimal;
}
获取长度
lua 中数组并不一定是满的,而且可能部分项放在哈希表中,所以获取数组长度的算法比较复杂。首先 alimit
不一定等于已分配数组的实际长度,数组的已分配长度叫做 realsize。
/*
** About 'alimit': if 'isrealasize(t)' is true, then 'alimit' is the
** real size of 'array'. Otherwise, the real size of 'array' is the
** smallest power of two not smaller than 'alimit' (or zero iff 'alimit'
** is zero); 'alimit' is then used as a hint for #t.
*/
#define BITRAS (1 << 7)
#define isrealasize(t) (!((t)->marked & BITRAS))
#define setrealasize(t) ((t)->marked &= cast_byte(~BITRAS))
#define setnorealasize(t) ((t)->marked |= BITRAS)
lua会先检查当前 limit
是否符合条件,不符合则会通过二分法在数组中寻找实际长度,如果找到将长度赋值给 alimit
这样下次简单判断之后就能使用了。如果数组部分全满则会在哈希表中通过二分法寻找长度。
(ltable.c)
lua_Unsigned luaH_getn (Table *t) {
unsigned int limit = t->alimit;
if (limit > 0 && isempty(&t->array[limit - 1])) { /* (1)? */
/* there must be a boundary before 'limit' */
if (limit >= 2 && !isempty(&t->array[limit - 2])) {
/* 'limit - 1' is a boundary; can it be a new limit? */
if (ispow2realasize(t) && !ispow2(limit - 1)) {
t->alimit = limit - 1;
setnorealasize(t); /* now 'alimit' is not the real size */
}
return limit - 1;
}
else { /* must search for a boundary in [0, limit] */
unsigned int boundary = binsearch(t->array, 0, limit);
/* can this boundary represent the real size of the array? */
if (ispow2realasize(t) && boundary > luaH_realasize(t) / 2) {
t->alimit = boundary; /* use it as the new limit */
setnorealasize(t);
}
return boundary;
}
}
/* 'limit' is zero or present in table */
if (!limitequalsasize(t)) { /* (2)? */
/* 'limit' > 0 and array has more elements after 'limit' */
if (isempty(&t->array[limit])) /* 'limit + 1' is empty? */
return limit; /* this is the boundary */
/* else, try last element in the array */
limit = luaH_realasize(t);
if (isempty(&t->array[limit - 1])) { /* empty? */
/* there must be a boundary in the array after old limit,
and it must be a valid new limit */
unsigned int boundary = binsearch(t->array, t->alimit, limit);
t->alimit = boundary;
return boundary;
}
/* else, new limit is present in the table; check the hash part */
}
/* (3) 'limit' is the last element and either is zero or present in table */
lua_assert(limit == luaH_realasize(t) &&
(limit == 0 || !isempty(&t->array[limit - 1])));
if (isdummy(t) || isempty(luaH_getint(t, cast(lua_Integer, limit + 1))))
return limit; /* 'limit + 1' is absent */
else /* 'limit + 1' is also present */
return hash_search(t, limit);
}