在上一篇文章我们已经理解了实现红黑树所需的理论知识。在本篇文章中,我将给出C语言版本的通用红黑树的具体实现思路。
你也可以在我的GitHub上下载到完整的代码:github.com/WU-SUNFLOWE…
实现基本的红黑树
编写头文件rb-tree.h
首先我们需要定义红黑树节点的结构体:
C 代码解读复制代码typedef enum { kBlack, kRed } Color;
typedef struct RbNode {
Color color;
struct RbNode* parent;
struct RbNode* left;
struct RbNode* right;
} RbNode;
你可能会注意到非常奇怪的一个地方:与一般的数据结构教材不同,为什么我们这里定义的红黑树节点只有指针域和颜色域,却没有数据域?
这是因为当我们在实际项目中应用红黑树时,具体的数据域可能是五花八门的。如果在这里将数据域的结构写死,将会大大打击我们的红黑树实现的可复用性。这里我们先暂且搁置这个问题,等到后面我将会介绍一种Linux内核源码中常用的技巧,利用它我们就可以将我们的红黑树实现运用到任何你想运用的地方。
接下来,为了在代码中能够更加清晰地对红黑树的根节点进行表示和管理,我们定义一个RbRoot
结构体,其中仅有一个RbNode*
指针,指向实际的根节点。
C 代码解读复制代码typedef struct RbRoot {
RbNode* rb_node;
} RbRoot;
#define InitializedRbRoot { NULL, }
// 当我们希望在程序中创建一棵新的红黑树时,我们可以这么做:
// RbRoot root = InitializedRbRoot;
然后我们定义一系列用于管理红黑树的函数。如下所示,大部分函数的功能都是非常好理解的。唯独我需要说明的有这几点。
首先是一个C/C++开发常识。代码中的extern "C"
用于处理C和C++代码混合编译时,链接器无法链接函数符号的问题。具体可参考这个视频。
此外,你可能会注意到InsertIntoRbTree
函数的定义十分奇怪。这是因为红黑树的本质仍然是一种二分查找树,当我们在实际项目中应用红黑树时,具体的二分规则同样也是五花八门的,因此插入新节点(以及搜索某个节点)的实现不适合写死在我们的通用红黑树的代码当中。
取而代之的是,我们希望程序员在使用通用红黑树时,能够自己编写代码,以确定将新的节点插入为哪个(原先的)叶子节点(parent
)的左孩子(parent_link = &parent->left
)亦或是右孩子(parent_link = &parent->right
),并调用通用红黑树的InsertIntoRbTree
函数实现真正的插入和平衡性调整。
如果你听了我的解释还不是很明白,也不用着急。后面我会给出具体的例子。
C 代码解读复制代码#ifdef __cplusplus
extern "C" {
#endif
/* Small utils */
inline void SetColor(RbNode* node, Color color) {
node->color = color;
}
inline bool IsRed(RbNode* node) {
return node != NULL && node->color == kRed;
}
inline bool IsBlack(RbNode* node) {
return node == NULL || node->color == kBlack;
}
inline bool IsEmptyRbRoot(RbRoot* root) {
return root->rb_node == NULL;
}
inline void Transplant(RbNode* old_node, RbNode* new_node, RbRoot* root); // Same with CLRS
/* Rotation function. */
inline void RotateLeft(RbNode* x, RbRoot* root);
inline void RotateRight(RbNode* x, RbRoot* root);
/* Insert implementation */
void FixupAfterInsert(RbNode* node, RbRoot* root);
void InsertIntoRbTree(RbNode* node, RbNode* parent, RbNode** parent_link, RbRoot* root);
/* Remove implementation */
void FixupAfterRemove(RbNode* node, RbNode* node_parent, RbRoot* root);
void RemoveFromRbTree(RbNode* node, RbRoot* root);
#ifdef __cplusplus
}
#endif
编写代码文件rb-tree.c
完成了头文件的编写,编写各个函数实际实现代码的工作也就水到渠成了。我的代码中已经给出了较为详细的注释,这里就不再多做解释了。
注意到我在代码中使用了大量的assert
断言语句,这体现了防御性编程的思想,有助于在开发和调试早期尽早发现潜在的错误。(尤其是C/C++这种直接与指针打交道的语言!)
C 代码解读复制代码#include "include/rb-tree.h"
#include
#include
inline void Transplant(RbNode* old_node, RbNode* new_node, RbRoot* root) {
assert(old_node != NULL);
if (old_node == root->rb_node) {
assert(old_node->parent == NULL);
root->rb_node = new_node;
} else if (old_node->parent->left == old_node) {
old_node->parent->left = new_node;
} else if (old_node->parent->right == old_node) {
old_node->parent->right = new_node;
}
if (new_node) {
new_node->parent = old_node->parent;
}
}
inline void RotateLeft(RbNode* x, RbRoot* root) {
/*
Before rotate.
p
|
x
/ \
/ \
a y
/ \
/ \
(b) c
After rotate. Besides x and y, we only need to adjust b's parent.
p
|
y
/ \
/ \
x c
/ \
/ \
a (b)
*/
assert(x != NULL);
RbNode* y = x->right;
assert(y != NULL);
RbNode* b = y->left;
// Reset y's parent.
Transplant(x, y, root);
// Reconnect x with y.
y->left = x;
x->parent = y;
// Reconnect b with x.
x->right = b;
if (b) b->parent = x;
}
inline void RotateRight(RbNode* x, RbRoot* root) {
/*
Before rotate.
p
|
x
/ \
/ \
y c
/ \
/ \
a (b)
After rotate. Besides x and y, we only need to adjust b's parent.
p
|
y
/ \
/ \
a x
/ \
/ \
(b) c
*/
assert(x != NULL);
RbNode* y = x->left;
assert(y != NULL);
RbNode* b = y->right;
// Reset y's parent.
Transplant(x, y, root);
// Reconnect x with y.
y->right = x;
x->parent = y;
// Connect b with x;
x->left = b;
if (b) b->parent = x;
}
void FixupAfterInsert(RbNode* node, RbRoot* root) {
assert(node != NULL);
RbNode* uncle = NULL;
RbNode* parent = NULL;
RbNode* gparent = NULL;
while (IsRed(parent = node->parent)) {
assert(node->color == kRed);
gparent = parent->parent;
assert(gparent != NULL);
if (parent == gparent->left) {
uncle = gparent->right;
/*
Case 1: Uncle node is red.
In this case, we don't care whether `node` is a left child or a right child.
| |
[gparent] gparent
/ \ / \
/ \ / \
/ \ ====> / \
parent uncle [parent] [uncle]
/
/
node
*/
if (IsRed(uncle)) {
SetColor(gparent, kRed);
SetColor(parent, kBlack);
SetColor(uncle, kBlack);
node = gparent;
continue;
}
/*
Case 2: Uncle is black, and `node` is the right child of its parent.
Let's covert this case into Case 3.
| |
[gparent] [gparent]
/ \ / \
/ \ / \
/ \ ====> / \
parent [uncle] `parent` pointer ---> node [uncle]
\ /
\ /
\ /
node `node` pointer ---> parent
*/
if (node == parent->right) {
RotateLeft(parent, root);
RbNode* tmp = parent;
parent = node;
node = tmp;
}
/*
Case 3: Uncle is black, and `node` is the left child of its parent.
After rotating and recoloring, the fixup algorithm is finished.
| |
[gparent] [parent]
/ \ / \
/ \ / \
/ \ ====> / \
parent [uncle] node gparent
/ \
/ \
/ \
node [uncle]
*/
RotateRight(gparent, root);
SetColor(parent, kBlack);
SetColor(gparent, kRed);
break;
}
else {
uncle = gparent->left;
/* Case 1 */
if (IsRed(uncle)) {
SetColor(gparent, kRed);
SetColor(parent, kBlack);
SetColor(uncle, kBlack);
node = gparent;
continue;
}
/* Case 2 */
if (node == parent->left) {
RotateRight(parent, root);
RbNode* tmp = parent;
parent = node;
node = tmp;
}
/* Case 3 */
RotateLeft(gparent, root);
SetColor(parent, kBlack);
SetColor(gparent, kRed);
break;
}
}
// Don't forget to force to set root node to black.
SetColor(root->rb_node, kBlack);
}
void InsertIntoRbTree(RbNode* node, RbNode* parent, RbNode** parent_link, RbRoot* root) {
assert(node != NULL && parent_link != NULL);
assert(*parent_link == NULL);
assert(!parent || (&parent->left == parent_link || &parent->right == parent_link));
node->left = node->right = NULL;
SetColor(node, kRed);
node->parent = parent;
*parent_link = node;
FixupAfterInsert(node, root);
}
void FixupAfterRemove(RbNode* node, RbNode* node_parent, RbRoot* root) {
while ((node == NULL || IsBlack(node)) && node != root->rb_node) {
assert(node_parent != NULL);
assert(node_parent->left == node || node_parent->right == node);
assert(node == NULL || node->parent == node_parent);
RbNode* sibling = NULL;
if (node == node_parent->left) {
sibling = node_parent->right;
assert(sibling != NULL);
/*
Case 1: The `node` has a red sibling.
Let's call `RotateLeft` so that we can enter case 2, 3 or 4.
| |
[parent] [sibling]
/ \ / \
/ \ / \
/ \ / \
/ \ / \
[[node]] sibling ====> parent [R]
/\ / \ / \ / \
/ \ / \ / \ / \
a b / \ / \ e f
[L] [R] [[node]] [L]
/ \ / \ / \ / \
/ \ / \ / \ / \
c d e f a b c d
*/
if (IsRed(sibling)) {
assert(IsBlack(node_parent));
RotateLeft(node_parent, root);
SetColor(node_parent, kRed);
SetColor(sibling, kBlack);
}
/*
Case 2: The `node` has a black sibling, and the sibling hasn't any red child.
| |
[] <----`node` pointer
/ \ / \
/ \ / \
/ \ / \
/ \ / \
[[node]] [sibling] ====> [node] sibling
/ \ / \ / \ / \
/ \ / \ / \ / \
a b / \ a b / \
[L] [R] [L] [R]
/ \ / \ / \ / \
/ \ / \ / \ / \
c d e f c d e f
*/
else if (!IsRed(sibling->left) && !IsRed(sibling->right)) {
SetColor(sibling, kRed);
node = node_parent;
node_parent = node->parent;
}
/*
Case 3: The `node` has a black sibling, and the sibling has a red child in left but not right.
Let's call `RotateRight` so that we can convert it into Case 4.
| |
/ \ / \
/ \ / \
/ \ / \
/ \ / \
[[node]] [sibling] ====> [[node]] [L] <------ `sibling` pointer in next loop
/ \ / \ / \ / \
/ \ / \ / \ / \
a b / \ a b / \
L [R] c sibling
/ \ / \ / \
/ \ / \ / \
c d e f / \
d [R]
/ \
/ \
e f
*/
else if (IsBlack(sibling->right)) {
assert(IsRed(sibling->left));
SetColor(sibling, kRed);
SetColor(sibling->left, kBlack);
RotateRight(sibling, root);
}
/*
Case 4: The `node` has a black sibling, and the sibling's right child is red.
Let's call `RotateLeft` so that we can convert it into Case 4.
| |
/ \ / \
/ \ / \
/ \ / \
/ \ / \
[[node]] [sibling] ====> [parent] [R]
/ \ / \ / \ / \
/ \ / \ / \ / \
a b / \ / \ e f
{L} R [node] {L}
/ \ / \ / \ / \
/ \ / \ / \ / \
c d e f a b c d
*/
else {
SetColor(sibling, node_parent->color);
SetColor(node_parent, kBlack);
SetColor(sibling->right, kBlack);
RotateLeft(node_parent, root);
// Ok, now the rb-tree is rebalanced.
// Let's exit the loop simply.
break;
}
}
else {
sibling = node_parent->left;
assert(sibling != NULL);
/* Case 1: The `node` has a red sibling. */
if (IsRed(sibling)) {
assert(IsBlack(node_parent));
RotateRight(node_parent, root);
SetColor(node_parent, kRed);
SetColor(sibling, kBlack);
}
/* Case 2 */
else if (!IsRed(sibling->left) && !IsRed(sibling->right)) {
SetColor(sibling, kRed);
node = node_parent;
node_parent = node->parent;
}
/* Case 3 */
else if (IsBlack(sibling->left)) {
assert(IsRed(sibling->right));
SetColor(sibling, kRed);
SetColor(sibling->right, kBlack);
RotateLeft(sibling, root);
}
/* Case 4 */
else {
SetColor(sibling, node_parent->color);
SetColor(node_parent, kBlack);
SetColor(sibling->left, kBlack);
RotateRight(node_parent, root);
// Ok, now the rb-tree is rebalanced.
// Let's exit the loop simply.
break;
}
}
}
// If `node` is tree root, it will "absorb" additional black color.
// If `node` is red, it will be recolored to black simply.
if (node != NULL) {
SetColor(node, kBlack);
}
}
void RemoveFromRbTree(RbNode* node, RbRoot* root) {
RbNode* replacement = NULL;
RbNode* replacement_parent = node->parent;
Color removed_color = node->color;
if (node->left == NULL) {
replacement = node->right;
Transplant(node, replacement, root);
}
else if (node->right == NULL) {
replacement = node->left;
Transplant(node, replacement, root);
}
else {
// Search the successor of node.
RbNode* successor = node->right;
while (successor->left != NULL) {
successor = successor->left;
}
// Record the replacement of `successor`,
// which will replace `successor` in logical.
replacement = successor->right;
if (successor != node->right) {
// Reset the parent of successor's right child
replacement_parent = successor->parent;
Transplant(successor, replacement, root);
// Reconnect node's right child with successor.
successor->right = node->right;
node->right->parent = successor;
} else {
replacement_parent = successor;
}
// Replace node with successor.
Transplant(node, successor, root);
// Reconnect node's left child with successor.
successor->left = node->left;
node->left->parent = successor;
// Record the original color of successor,
// which is the real color the rb-tree lost.
removed_color = successor->color;
// Recolor successor with node's color.
SetColor(successor, node->color);
}
// Don't forget to rebalance the rb-tree.
if (removed_color == kBlack) {
FixupAfterRemove(replacement, replacement_parent, root);
}
}
实现通用红黑树
在前文中我们已经简单地提到,我们希望我们实现的红黑树具备一定的通用性,这意味着我们可以将实际项目中更加复杂的数据结构通过红黑树的形式组织起来,并通过统一的通用红黑树函数进行增删改查操作。
为了实现这个需求,一个容易想到的策略就是组合(Composition),即在我们自己的更复杂的数据结构中内联一个通用红黑树的节点结构体。这也是在Linux内核中被广泛使用的一个技巧。
比如,下面就是一个最简单的例子:
C 代码解读复制代码struct MyData {
int value;
struct RbNode rb_node;
};
当我们编写代码调用通用红黑树提供的函数时,就需要从我们自己的数据结构MyData
中访问红黑树节点rb_node
。这是非常简单的。
但如果反过来,当我们访问到通用红黑树上的某个节点后,若希望对该节点对应的MyData
结构体进行操作,则应该怎么做呢?或者更直白地说,已知某个红黑树节点首地址,则要如何获取到内联该节点的结构体的首地址呢?
为了解决该问题,我们引入如下的两个工具宏:
C 代码解读复制代码// OffsetOf用于计算结构体type中member成员变量相对于结构体首地址的偏移量
#define OffsetOf(type, member) (uintptr_t)(&((type*)0)->member)
// 三个参数分别为:红黑树节点的首地址,内联红黑树节点的结构体类型,该结构体类型中内联红黑树节点的成员变量名
#define ContainerOf(ptr, type, member) (type*)((uintptr_t)(ptr) - OffsetOf(type, member));
以下是用法举例:
C 代码解读复制代码MyData* data = (MyData*)malloc(sizeof(MyData));
RbNode* rb_node_ptr = &data->rb_node;
MyData* data_ptr = ContainerOf(rb_node_ptr, struct RbNode, rb_node);
if (data == data_ptr) {
puts("Passed!"); // 这句代码会执行
} else {
puts("Failed!");
}
解决了这个问题之后,我们就可以编写使用红黑树来管理MyData
结构体的工具函数了:
C 代码解读复制代码void MyInsertIntoRbTree(MyData* new_data, RbRoot* root) {
RbNode* parent = nullptr;
RbNode** link_ptr = &root->rb_node;
while (*link_ptr) {
parent = *link_ptr;
MyData* parent_data = ContainerOf(parent, struct MyData, rb_node);
if (parent_data->value > new_data->value) {
link_ptr = &parent->left;
}
else {
link_ptr = &parent->right;
}
}
assert(link_ptr != nullptr);
InsertIntoRbTree(&new_data->rb_node, parent, link_ptr, root);
}
void MyRemoveFromRbTree(MyData* data, RbRoot* root) {
RemoveFromRbTree(&data->rb_node, root);
free(data); // 将节点从通用红黑树摘除后,申请的堆内存需要我们自己手工释放!!!
}
void MyPrintRbTree(RbNode* node) {
assert(node != nullptr);
if (node->left) MyPrintRbTree(node->left);
MyData* data = ContainerOf(node, struct MyData, rb_node);
printf("color=%s value=%d ", node->color == kBlack ? "black" : "red", data->value);
if (node->left) {
data = ContainerOf(node->left, struct MyData, rb_node);
printf("left_value=%d ", data->value);
}
if (node->right) {
data = ContainerOf(node->right, struct MyData, rb_node);
printf("right_value=%d ", data->value);
}
putchar('\n');
if (node->right) MyPrintRbTree(node->right);
}
实现红黑树测试工具(基于C++)
红黑树作为一种比较复杂的二叉平衡树,在实际写代码实现时很容易写错掉,所以这里我还写了一些测试函数,用于验证我们实现的红黑树能否正确工作。
其中
bool IsLegalRbTree(RbRoot* root_node)
:进一步调用InorderTraversalChecker
函数,扫描指定的二叉搜索树,检查其是否符合红黑树的五条性质。bool RbTreeTesterAuto(int node_num, bool print_log)
:随机生成node_num
个数据域取值不同的节点插入红黑树,并检查最终的红黑树的五条性质仍然成立;再将这些节点逐个从红黑树中移除,每移除一个节点后,检查一次红黑树是否仍然合法。bool RbTreeTesterWithValues(const std::vector
:类似& values, bool print_log) RbTreeTesterAuto
函数,不过插入红黑树的节点数据域取值由用户指定。
C++ 代码解读复制代码#include
#include
#include
#include
#include
#include
static void InorderTraversalChecker(
RbNode* node, std::vector<int>* record,
int cur_black_count, std::vector<int>* black_count_record
) {
// 2. Are all nodes either red or black in color?
if (!IsRed(node) && !IsBlack(node)) {
throw "Failed: All nodes are either red or black in color.";
}
// 3. Are all red nodes' child and parent node black in color?
if (IsRed(node) && (IsRed(node->parent) || IsRed(node->left) || IsRed(node->right))) {
throw "Failed: All red nodes' child and parent node are black in color.";
}
// Update black count
if (IsBlack(node)) {
cur_black_count += 1;
}
// Inorder traversal left
if (node->left != nullptr) {
InorderTraversalChecker(node->left, record, cur_black_count, black_count_record);
}
// Record node value
MyData* my_data_struct = ContainerOf(node, struct MyData, rb_node);
record->push_back(my_data_struct->value);
// Inorder traversal right
if (node->right != nullptr) {
InorderTraversalChecker(node->right, record, cur_black_count, black_count_record);
}
// 4. Are the numbers of black nodes in the simple paths from root node to any leaf node same?
if (!node->left && !node->right) {
if (black_count_record->size() > 0 && cur_black_count != black_count_record->back()) {
throw "Failed: The numbers of black nodes in the"
" simple paths from root node to any leaf node are same";
}
black_count_record->push_back(cur_black_count);
}
}
bool IsLegalRbTree(RbRoot* root_node) {
// 5. Is the root node black in color?
if (!IsBlack(root_node->rb_node)) {
std::cerr << "Failed: The root node is black in color" << std::endl;
return false;
}
std::vector<int> record;
std::vector<int> black_count_record;
try {
InorderTraversalChecker(root_node->rb_node, &record, 0, &black_count_record);
}
catch (const char* msg) {
std::cerr << msg << std::endl;
return false;
}
// 1. Is this tree a legal BST?
std::vector<int> sorted_record = record;
std::sort(sorted_record.begin(), sorted_record.end());
if (!std::equal(record.begin(), record.end(), sorted_record.begin())) {
std::cerr << "Failed: This tree is a legal BST." << std::endl;
return false;
}
return true;
}
bool RbTreeTesterAuto(int node_num, bool print_log) {
std::random_device rd; // Random seed generator
std::mt19937 gen(rd()); // Random number generator
std::uniform_int_distribution<int> dis(-node_num * 2, node_num * 2); // The range of random number is [-1000, 1000]
RbRoot root = InitializedRbRoot;
std::queue datas;
// Test the insert function.
std::unordered_set<int> set;
for (int i = 0; i < node_num; ++i) {
MyData* my_data_struct = reinterpret_cast(malloc(sizeof(MyData)));
assert(my_data_struct != nullptr);
do {
my_data_struct->value = dis(gen);
} while (set.count(my_data_struct->value) > 0);
set.insert(my_data_struct->value);
// Record rb-tree node.
datas.push(my_data_struct);
// Insert the new node to the rb-tree.
MyInsertIntoRbTree(my_data_struct, &root);
}
if (print_log) std::cout << "Inserted all nodes." << std::endl;
// Check whether the rb-tree is legal.
if (!IsLegalRbTree(&root)) {
return false;
}
if (print_log) std::cout << "Passed check after inserting." << std::endl;
if (print_log) MyPrintRbTree(root.rb_node);
// Test the remove function.
while (!datas.empty()) {
MyData* data = datas.front();
int node_value = data->value;
if (print_log) std::cout << "Removing node " << node_value << std::endl;
// Rmove current node from the rb-tree.
MyRemoveFromRbTree(data, &root);
// Check
if (!IsEmptyRbRoot(&root) && !IsLegalRbTree(&root)) {
return false;
}
datas.pop();
if (print_log) {
std::cout << "Successfully remove node " << node_value << std::endl;
}
if (!IsEmptyRbRoot(&root) && print_log) {
MyPrintRbTree(root.rb_node);
}
}
return true;
}
bool RbTreeTesterWithValues(const std::vector<int>& values, bool print_log) {
RbRoot root = InitializedRbRoot;
std::queue datas;
// Test the insert function.
for (int value : values) {
MyData* my_data_struct = reinterpret_cast(malloc(sizeof(MyData)));
assert(my_data_struct != nullptr);
my_data_struct->value = value;
// Record rb-tree node.
datas.push(my_data_struct);
// Insert the new node to the rb-tree.
MyInsertIntoRbTree(my_data_struct, &root);
}
std::cout << "Inserted all nodes." << std::endl;
// Check whether the rb-tree is legal.
if (!IsEmptyRbRoot(&root) && !IsLegalRbTree(&root)) {
return false;
}
std::cout << "Passed check after inserting." << std::endl;
if (print_log) MyPrintRbTree(root.rb_node);
// Test the remove function.
while (!datas.empty()) {
MyData* data = datas.front();
int node_value = data->value;
if (print_log) std::cout << "Removing node " << node_value << std::endl;
// Rmove current node from the rb-tree.
MyRemoveFromRbTree(data, &root);
// Check
if (!IsEmptyRbRoot(&root) && !IsLegalRbTree(&root)) {
return false;
}
datas.pop();
if (print_log) {
std::cout << "Successfully remove node " << node_value << std::endl;
}
if (!IsEmptyRbRoot(&root) && print_log) {
MyPrintRbTree(root.rb_node);
}
}
return true;
}
评论记录:
回复评论: