Golang中Bit数组的实现方式-巨人网络通讯

Golang中Bit数组的实现方式

Go语言里的集合一般会用map[T]bool这种形式来表示，T代表元素类型。集合用map类型来表示虽然非常灵活，但我们可以以一种更好的形式来表示它。

例如在数据流分析领域，集合元素通常是一个非负整数，集合会包含很多元素，并且集合会经常进行并集、交集操作，这种情况下，bit数组会比map表现更加理想。

一个bit数组通常会用一个无符号数或者称之为“字”的slice来表示，每一个元素的每一位都表示集合里的一个值。当集合的第i位被设置时，我们才说这个集合包含元素i。

下面的这个程序展示了一个简单的bit数组类型，并且实现了三个函数来对这个bit数组来进行操作：

package main
import (
	"bytes"
	"fmt"
)
// An IntSet is a set of small non-negative integers.
// Its zero value represents the empty set.
type IntSet struct {
	words []uint
}
const (
	bitNum = (32  (^uint(0) >> 63)) //根据平台自动判断决定是32还是64
)
// Has reports whether the set contains the non-negative value x.
func (s *IntSet) Has(x int) bool {
	word, bit := x/bitNum, uint(x%bitNum)
	return word  len(s.words)  s.words[word](1bit) != 0
}
// Add adds the non-negative value x to the set.
func (s *IntSet) Add(x int) {
	word, bit := x/bitNum, uint(x%bitNum)
	for word >= len(s.words) {
		s.words = append(s.words, 0)
	}
	s.words[word] |= 1  bit
}
//A与B的交集，合并A与B
// UnionWith sets s to the union of s and t.
func (s *IntSet) UnionWith(t *IntSet) {
	for i, tword := range t.words {
		if i  len(s.words) {
			s.words[i] |= tword
		} else {
			s.words = append(s.words, tword)
		}
	}
}

因为每一个字都有64个二进制位，所以为了定位x的bit位，我们用了x/64的商作为字的下标，并且用x%64得到的值作为这个字内的bit的所在位置。

例如，对于数字1，将其加入比特数组：

func (s *IntSet) Add(x int) {
 word, bit := x/bitNum, uint(x%bitNum) //0, 1 := 1/64, uint(1%64)
 for word >= len(s.words) { // 条件不满足
  s.words = append(s.words, 0)
 }
 s.words[word] |= 1  bit // s.words[0] |= 1  1
}
// 把1存入后，words数组变为了[]uint64{2}

同理，假如我们再将66加入比特数组：

func (s *IntSet) Add(x int) {
 word, bit := x/bitNum, uint(x%bitNum) //1, 2 := 66/64, uint(66%64)
 for word >= len(s.words) { // 条件满足
  s.words = append(s.words, 0) // 此时s.words = []uint64{2, 0}
 }
 s.words[word] |= 1  bit // s.words[1] |= 1  2
}
// 继续把66存入后，words数组变为了[]uint64{2, 4}

所以，对于words，每个元素可存储的值有64个，每超过64个则进位，即添加一个元素。（注意，0也占了一位，所以64才要进位，第一个元素可存储0-63）。

所以，对于words中的一个元素，要转换为具体的值时：首先取到其位置i，用 64 * i 作为已进位数（类似于每10位要进位），然后将这个元素转换为二进制数，从右往左数，第多少位为1则表示相应的有这个值，用这个位数 x+64 *i 即为我们存入的值。

相应的，可有如下String()函数

// String returns the set as a string of the form "{1 2 3}".
func (s *IntSet) String() string {
 var buf bytes.Buffer
 buf.WriteByte('{')
 for i, word := range s.words {
  if word == 0 {
   continue
  }
  for j := 0; j  bitNum; j++ {
   if word(1uint(j)) != 0 {
    if buf.Len() > len("{") {
     buf.WriteByte(' ')
    }
    fmt.Fprintf(buf, "%d", bitNum*i+j)
   }
  }
 }
 buf.WriteByte('}')
 return buf.String()
}

例如，前面存入了1和66后，转换过程为：

// []uint64{2 4}
// 对于2: 1  1 = 2; 所以 x = 0 * 64 + 1 
// 对于4: 1  2 = 4; 所以 x = 1 * 64 + 2
// 所以转换为String为{1 66}

实现比特数组的其他方法函数

func (s *IntSet) Len() int {
	var len int
	for _, word := range s.words {
		for j := 0; j  bitNum; j++ {
			if word(1uint(j)) != 0 {
				len++
			}
		}
	}
	return len
}
func (s *IntSet) Remove(x int) {
	word, bit := x/bitNum, uint(x%bitNum)
	if s.Has(x) {
		s.words[word] ^= 1  bit
	}
}
func (s *IntSet) Clear() {
	s.words = append([]uint{})
}
func (s *IntSet) Copy() *IntSet {
	intSet := IntSet{
		words: []uint{},
	}
	for _, value := range s.words {
		intSet.words = append(intSet.words, value)
	}
	return intSet
}
func (s *IntSet) AddAll(args ...int) {
	for _, x := range args {
		s.Add(x)
	}
}
//A与B的并集，A与B中均出现
func (s *IntSet) IntersectWith(t *IntSet) {
	for i, tword := range t.words {
		if i >= len(s.words) {
			continue
		}
		s.words[i] = tword
	}
}
//A与B的差集，元素出现在A未出现在B
func (s *IntSet) DifferenceWith(t *IntSet) {
	t1 := t.Copy() //为了不改变传参t，拷贝一份
	t1.IntersectWith(s)
	for i, tword := range t1.words {
		if i  len(s.words) {
			s.words[i] ^= tword
		}
	}
}
//A与B的并差集，元素出现在A没有出现在B，或出现在B没有出现在A
func (s *IntSet) SymmetricDifference(t *IntSet) {
	for i, tword := range t.words {
		if i  len(s.words) {
			s.words[i] ^= tword
		} else {
			s.words = append(s.words, tword)
		}
	}
}
//获取比特数组中的所有元素的slice集合
func (s *IntSet) Elems() []int {
	var elems []int
	for i, word := range s.words {
		for j := 0; j  bitNum; j++ {
			if word(1uint(j)) != 0 {
				elems = append(elems, bitNum*i+j)
			}
		}
	}
	return elems
}

至此，比特数组的常用方法函数都已实现，现在可以来使用它。

func main() {
	var x, y IntSet
	x.Add(1)
	x.Add(144)
	x.Add(9)
	fmt.Println("x:", x.String()) // "{1 9 144}"
	y.Add(9)
	y.Add(42)
	fmt.Println("y:", y.String()) // "{9 42}"
	x.UnionWith(y)
	fmt.Println("x unionWith y:", x.String())         // "{1 9 42 144}"
	fmt.Println("x has 9,123:", x.Has(9), x.Has(123)) // "true false"
	fmt.Println("x len:", x.Len())                    //4
	fmt.Println("y len:", y.Len())                    //2
	x.Remove(42)
	fmt.Println("x after Remove 42:", x.String()) //{1 9 144}
	z := x.Copy()
	fmt.Println("z copy from x:", z.String()) //{1 9 144}
	x.Clear()
	fmt.Println("clear x:", x.String()) //{}
	x.AddAll(1, 2, 9)
	fmt.Println("x addAll 1,2,9:", x.String()) //{1 2 9}
	x.IntersectWith(y)
	fmt.Println("x intersectWith y:", x.String()) //{9}
	x.AddAll(1, 2)
	fmt.Println("x addAll 1,2:", x.String()) //{1 2 9}
	x.DifferenceWith(y)
	fmt.Println("x differenceWith y:", x.String()) //{1 2}
	x.AddAll(9, 144)
	fmt.Println("x addAll 9,144:", x.String()) //{1 2 9 144}
	x.SymmetricDifference(y)
	fmt.Println("x symmetricDifference y:", x.String()) //{1 2 42 144}
	for _, value := range x.Elems() {
		fmt.Print(value, " ") //1 2 42 144
	}
}

以上为个人经验，希望能给大家一个参考，也希望大家多多支持脚本之家。如有错误或未考虑完全的地方，望不吝赐教。

您可能感兴趣的文章: