From 119e26b529688f45cfe3be82719b8006e18e5308 Mon Sep 17 00:00:00 2001 From: q191201771 <191201771@qq.com> Date: Fri, 15 Nov 2019 18:59:13 +0800 Subject: [PATCH] pkg ic patch --- README.md | 1 + pkg/ic/ic.go | 11 ++++ pkg/ic/ic_test.go | 103 +++++++++++++++++++----------------- pkg/ic/lf_compressor.go | 21 ++++++-- pkg/ic/origin_compressor.go | 12 ++++- pkg/ic/util.go | 32 ++--------- 6 files changed, 98 insertions(+), 82 deletions(-) create mode 100644 pkg/ic/ic.go diff --git a/README.md b/README.md index 4dc4149..eff7567 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,7 @@ pkg/ ...... 源码包 |-- filebatch/ ...... 文件批处理操作 |-- unique/ ...... 对象唯一 ID |-- nazamd5/ ...... md5 操作 + |-- ic/ ...... 将整型切片压缩成二进制字节切片 |-- mockserver/ ...... 模拟一些服务端,用于快速测试其它代码 |-- mockwriter/ ...... 模拟 Writer 接口,用于快速测试其它代码 demo/ ...... 示例相关的代码 diff --git a/pkg/ic/ic.go b/pkg/ic/ic.go new file mode 100644 index 0000000..1c30812 --- /dev/null +++ b/pkg/ic/ic.go @@ -0,0 +1,11 @@ +// package ic 将整型切片压缩成二进制字节切片 +package ic + +// 具体使用见 LFCompressor 和 OriginCompressor +type Compressor interface { + // 将整型切片压缩成二进制字节切片 + Marshal(ids []uint32) (ret []byte) + // 将二进制字节切片反序列化为整型切片 + // 反序列化后得到的整型切片,切片中整型的顺序和序列化之前保持不变 + Unmarshal(b []byte) (ids []uint32) +} diff --git a/pkg/ic/ic_test.go b/pkg/ic/ic_test.go index 4dea754..ab14f86 100644 --- a/pkg/ic/ic_test.go +++ b/pkg/ic/ic_test.go @@ -1,43 +1,39 @@ package ic import ( - "bytes" - "io/ioutil" "log" - "os" - "strconv" "testing" ) -// 从文件中读取 uid 列表 -func obtainUIDList(filename string) (uids IDSlice) { - fp, err := os.Open(filename) - if err != nil { - panic(err) - } - buf, err := ioutil.ReadAll(fp) - if err != nil { - panic(err) - } - lines := bytes.Split(buf, []byte("\n")) - for _, line := range lines { - if len(line) == 0 { - continue - } - item, err := strconv.ParseUint(string(line), 10, 32) - if err != nil { - panic(err) - } - uids = append(uids, uint32(item)) - } - return uids -} +//// 从文件中读取 uid 列表 +//func obtainUIDList(filename string) (uids IDSlice) { +// fp, err := os.Open(filename) +// if err != nil { +// panic(err) +// } +// buf, err := ioutil.ReadAll(fp) +// if err != nil { +// panic(err) +// } +// lines := bytes.Split(buf, []byte("\n")) +// for _, line := range lines { +// if len(line) == 0 { +// continue +// } +// item, err := strconv.ParseUint(string(line), 10, 32) +// if err != nil { +// panic(err) +// } +// uids = append(uids, uint32(item)) +// } +// return uids +//} //var FILENAME = "uid.txt" -func marshalWrap(ids IDSlice) (ret []byte) { +func marshalWrap(ids []uint32) (ret []byte) { log.Println("> sort.") - sortIDSlice(ids) + Sort(ids) log.Println("< sort.") log.Println("> marshal.") @@ -55,7 +51,7 @@ func marshalWrap(ids IDSlice) (ret []byte) { return } -func unmarshalWrap(b []byte) (ret IDSlice) { +func unmarshalWrap(b []byte) (ret []uint32) { b = zlibRead(b) //var oc OriginCompressor @@ -71,7 +67,7 @@ func TestIC(t *testing.T) { log.SetFlags(log.Lmicroseconds) // 单元测试 case - uidss := []IDSlice{ + uidss := [][]uint32{ {1, 2, 3, 18, 32, 100}, {1, 2, 3, 18, 32}, {1, 2, 3, 18}, @@ -86,30 +82,39 @@ func TestIC(t *testing.T) { {1}, } - // 从文件加载 uid 白名单 - //uids := obtainUIDList(FILENAME) - //var uidss []IDSlice - //uidss = append(uidss, uids) + var compressors []Compressor - for _, uids := range uidss { - log.Println("-----") - log.Println("in uid len:", len(uids)) + compressors = append(compressors, &OriginCompressor{}) + compressors = append(compressors, &OriginCompressor{ZlibExt: true}) - b := marshalWrap(uids) - log.Println("len(b):", len(b)) + compressors = append(compressors, &LFCompressor{FB: 0}) + compressors = append(compressors, &LFCompressor{FB: 0, ZlibExt: true}) + compressors = append(compressors, &LFCompressor{FB: 2}) + compressors = append(compressors, &LFCompressor{FB: 4}) + compressors = append(compressors, &LFCompressor{FB: 4, ZlibExt: true}) - uids2 := unmarshalWrap(b) - log.Println("out uid len:", len(uids2)) + for _, c := range compressors { + for _, uids := range uidss { + log.Println("-----") + log.Println("in uid len:", len(uids)) - // assert check - if len(uids) != len(uids2) { - panic(0) - } - for i := range uids { - if uids[i] != uids2[i] { + Sort(uids) + b := c.Marshal(uids) + log.Println("len(b):", len(b)) + + uids2 := c.Unmarshal(b) + log.Println("out uid len:", len(uids2)) + + // assert check + if len(uids) != len(uids2) { panic(0) } + for i := range uids { + if uids[i] != uids2[i] { + panic(0) + } + } + log.Println("-----") } - log.Println("-----") } } diff --git a/pkg/ic/lf_compressor.go b/pkg/ic/lf_compressor.go index d1ffcc1..63a95f4 100644 --- a/pkg/ic/lf_compressor.go +++ b/pkg/ic/lf_compressor.go @@ -5,13 +5,20 @@ import ( ) type LFCompressor struct { - FB uint32 // 用几个字节的 bit 表示跟随的数据 + FB uint32 // 用几个字节的 bit 表示跟随的数据 + ZlibExt bool // 压缩之后,是否再用 zlib 进一步压缩 + oc OriginCompressor // FB 为0时,退化成使用 OriginCompressor } -func (lfc *LFCompressor) Marshal(ids IDSlice) (ret []byte) { +// 传入的整型切片必须是从小到大有序排列 +func (lfc *LFCompressor) Marshal(ids []uint32) (ret []byte) { if lfc.FB == 0 { - return lfc.oc.Marshal(ids) + ret = lfc.oc.Marshal(ids) + if lfc.ZlibExt { + ret = zlibWrite(ret) + } + return ret } lBuf := make([]byte, 4) @@ -62,10 +69,16 @@ func (lfc *LFCompressor) Marshal(ids IDSlice) (ret []byte) { ret = append(ret, lBuf...) ret = append(ret, fBuf...) } + if lfc.ZlibExt { + ret = zlibWrite(ret) + } return } -func (lfc *LFCompressor) Unmarshal(b []byte) (ids IDSlice) { +func (lfc *LFCompressor) Unmarshal(b []byte) (ids []uint32) { + if lfc.ZlibExt { + b = zlibRead(b) + } if lfc.FB == 0 { return lfc.oc.Unmarshal(b) } diff --git a/pkg/ic/origin_compressor.go b/pkg/ic/origin_compressor.go index 011bb49..977c6d0 100644 --- a/pkg/ic/origin_compressor.go +++ b/pkg/ic/origin_compressor.go @@ -3,17 +3,25 @@ package ic import "encoding/binary" type OriginCompressor struct { + ZlibExt bool // 压缩之后,是否再用 zlib 进一步压缩 } -func (oc *OriginCompressor) Marshal(ids IDSlice) (ret []byte) { +// 并不强制要求整型切片有序 +func (oc *OriginCompressor) Marshal(ids []uint32) (ret []byte) { ret = make([]byte, len(ids)*4) for i, id := range ids { binary.LittleEndian.PutUint32(ret[i*4:], id) } + if oc.ZlibExt { + ret = zlibWrite(ret) + } return } -func (oc *OriginCompressor) Unmarshal(b []byte) (ids IDSlice) { +func (oc *OriginCompressor) Unmarshal(b []byte) (ids []uint32) { + if oc.ZlibExt { + b = zlibRead(b) + } n := len(b) / 4 for i := 0; i < n; i++ { id := binary.LittleEndian.Uint32(b[i*4:]) diff --git a/pkg/ic/util.go b/pkg/ic/util.go index 5e53b0d..8229566 100644 --- a/pkg/ic/util.go +++ b/pkg/ic/util.go @@ -7,11 +7,11 @@ import ( "sort" ) -type IDSlice []uint32 - -func (a IDSlice) Len() int { return len(a) } -func (a IDSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a IDSlice) Less(i, j int) bool { return a[i] < a[j] } +func Sort(ids []uint32) { + sort.Slice(ids, func(i, j int) bool { + return ids[i] < ids[j] + }) +} func resetBuf(b []byte) []byte { for i := 0; i < len(b); i++ { @@ -20,10 +20,6 @@ func resetBuf(b []byte) []byte { return b } -func sortIDSlice(ids IDSlice) { - sort.Sort(ids) -} - func zlibWrite(in []byte) []byte { var b bytes.Buffer w := zlib.NewWriter(&b) @@ -38,21 +34,3 @@ func zlibRead(in []byte) (ret []byte) { ret, _ = ioutil.ReadAll(r) return } - -//func isBufEmpty(b []byte) bool { -// for i := 0; i < len(b); i++ { -// if b[i] != 0 { -// return false -// } -// } -// return true -//} -// -//func dumpIDSlice(ids IDSlice, filename string) { -// fp, _ := os.Create(filename) -// for _, id := range ids { -// _, _ = fp.WriteString(fmt.Sprintf("%d", id)) -// _, _ = fp.WriteString("\n") -// } -// _ = fp.Close() -//}