├── .github
└── workflows
│ ├── codeql-analysis.yml
│ ├── go.yml
│ └── reviewdog.yml
├── .gitignore
├── LICENSE
├── go.mod
├── go.sum
├── readme-cn.md
├── readme.md
├── ring.go
├── ring_test.go
├── routinegroup.go
├── routinegroup_test.go
├── stream.go
└── stream_test.go
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
1 | # For most projects, this workflow file will not need changing; you simply need
2 | # to commit it to your repository.
3 | #
4 | # You may wish to alter this file to override the set of languages analyzed,
5 | # or to provide custom queries or build logic.
6 | #
7 | # ******** NOTE ********
8 | # We have attempted to detect the languages in your repository. Please check
9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 |
14 | on:
15 | push:
16 | branches: [ main ]
17 | pull_request:
18 | # The branches below must be a subset of the branches above
19 | branches: [ main ]
20 | schedule:
21 | - cron: '18 19 * * 6'
22 |
23 | jobs:
24 | analyze:
25 | name: Analyze
26 | runs-on: ubuntu-latest
27 |
28 | strategy:
29 | fail-fast: false
30 | matrix:
31 | language: [ 'go' ]
32 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
33 | # Learn more:
34 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
35 |
36 | steps:
37 | - name: Checkout repository
38 | uses: actions/checkout@v2
39 |
40 | # Initializes the CodeQL tools for scanning.
41 | - name: Initialize CodeQL
42 | uses: github/codeql-action/init@v1
43 | with:
44 | languages: ${{ matrix.language }}
45 | # If you wish to specify custom queries, you can do so here or in a config file.
46 | # By default, queries listed here will override any specified in a config file.
47 | # Prefix the list here with "+" to use these queries and those in the config file.
48 | # queries: ./path/to/local/query, your-org/your-repo/queries@main
49 |
50 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
51 | # If this step fails, then you should remove it and run the build manually (see below)
52 | - name: Autobuild
53 | uses: github/codeql-action/autobuild@v1
54 |
55 | # ℹ️ Command-line programs to run using the OS shell.
56 | # 📚 https://git.io/JvXDl
57 |
58 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
59 | # and modify them (or add more) to build your code if your project
60 | # uses a compiled language
61 |
62 | #- run: |
63 | # make bootstrap
64 | # make release
65 |
66 | - name: Perform CodeQL Analysis
67 | uses: github/codeql-action/analyze@v1
68 |
--------------------------------------------------------------------------------
/.github/workflows/go.yml:
--------------------------------------------------------------------------------
1 | name: Go
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | pull_request:
7 | branches: [ main ]
8 |
9 | jobs:
10 | build:
11 | name: Build
12 | runs-on: ubuntu-latest
13 | steps:
14 |
15 | - name: Set up Go 1.x
16 | uses: actions/setup-go@v2
17 | with:
18 | go-version: ^1.14
19 | id: go
20 |
21 | - name: Check out code into the Go module directory
22 | uses: actions/checkout@v2
23 |
24 | - name: Get dependencies
25 | run: |
26 | go get -v -t -d ./...
27 |
28 | - name: Lint
29 | run: |
30 | go vet -stdmethods=false $(go list ./...)
31 | go install mvdan.cc/gofumpt@latest
32 | test -z "$(gofumpt -s -l -extra .)" || echo "Please run 'gofumpt -l -w -extra .'"
33 |
34 | - name: Test
35 | run: go test -race -coverprofile=coverage.txt -covermode=atomic ./...
36 |
37 | - name: Codecov
38 | uses: codecov/codecov-action@v2
39 |
--------------------------------------------------------------------------------
/.github/workflows/reviewdog.yml:
--------------------------------------------------------------------------------
1 | name: reviewdog
2 | on: [pull_request]
3 | jobs:
4 | staticcheck:
5 | name: runner / staticcheck
6 | runs-on: ubuntu-latest
7 | steps:
8 | - uses: actions/checkout@v2
9 | - uses: reviewdog/action-staticcheck@v1
10 | with:
11 | github_token: ${{ secrets.github_token }}
12 | # Change reviewdog reporter if you need [github-pr-check,github-check,github-pr-review].
13 | reporter: github-pr-review
14 | # Report all results.
15 | filter_mode: nofilter
16 | # Exit with 1 when it find at least one finding.
17 | fail_on_error: true
18 | # Set staticcheck flags
19 | staticcheck_flags: -checks=inherit,-SA1019,-SA1029,-SA5008
20 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Binaries for programs and plugins
2 | *.exe
3 | *.exe~
4 | *.dll
5 | *.so
6 | *.dylib
7 |
8 | # Test binary, built with `go test -c`
9 | *.test
10 |
11 | # Output of the go coverage tool, specifically when used with LiteIDE
12 | *.out
13 |
14 | # dev files
15 | .idea
16 |
17 | # Dependency directories (remove the comment below to include it)
18 | # vendor/
19 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Kevin Wan
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/kevwan/stream
2 |
3 | go 1.17
4 |
5 | require github.com/stretchr/testify v1.7.0
6 |
7 | require (
8 | github.com/davecgh/go-spew v1.1.0 // indirect
9 | github.com/pmezard/go-difflib v1.0.0 // indirect
10 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
11 | )
12 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
2 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
5 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
6 | github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
7 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
8 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
9 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
10 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
11 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
12 |
--------------------------------------------------------------------------------
/readme-cn.md:
--------------------------------------------------------------------------------
1 | # stream
2 |
3 | [English](readme.md) | 简体中文
4 |
5 | [](https://github.com/kevwan/stream/actions)
6 | [](https://codecov.io/gh/kevwan/stream)
7 | [](https://goreportcard.com/report/github.com/kevwan/stream)
8 | [](https://github.com/kevwan/stream)
9 | [](https://opensource.org/licenses/MIT)
10 |
11 | ## 为什么会有这个项目
12 |
13 | `stream` 其实是 [go-zero](https://github.com/zeromicro/go-zero) 的一部分,但是一些用户问我是不是可以单独使用 `fx`(go-zero里叫 fx)而不用引入 `go-zero` 的依赖,所以我考虑再三,还是单独提供一个吧。但是,我强烈推荐你使用 `go-zero`,因为 `go-zero` 真的提供了很多很好的功能。
14 |
15 |
16 |
17 | ## 什么是流处理
18 |
19 | 如果有 java 使用经验的同学一定会对 java8 的 Stream 赞不绝口,极大的提高了们对于集合类型数据的处理能力。
20 |
21 | ```java
22 | int sum = widgets.stream()
23 | .filter(w -> w.getColor() == RED)
24 | .mapToInt(w -> w.getWeight())
25 | .sum();
26 | ```
27 |
28 | Stream 能让我们支持链式调用和函数编程的风格来实现数据的处理,看起来数据像是在流水线一样不断的实时流转加工,最终被汇总。Stream 的实现思想就是将数据处理流程抽象成了一个数据流,每次加工后返回一个新的流供使用。
29 |
30 | ## Stream 功能定义
31 |
32 | 动手写代码之前,先想清楚,把需求理清楚是最重要的一步,我们尝试代入作者的视角来思考整个组件的实现流程。首先把底层实现的逻辑放一下 ,先尝试从零开始进行功能定义 stream 功能。
33 |
34 | Stream 的工作流程其实也属于生产消费者模型,整个流程跟工厂中的生产流程非常相似,尝试先定义一下 Stream 的生命周期:
35 |
36 | 1. 创建阶段/数据获取(原料)
37 | 2. 加工阶段/中间处理(流水线加工)
38 | 3. 汇总阶段/终结操作(最终产品)
39 |
40 | 下面围绕 stream 的三个生命周期开始定义 API:
41 |
42 | #### 创建阶段
43 |
44 | 为了创建出数据流 stream 这一抽象对象,可以理解为构造器。
45 |
46 | 我们支持三种方式构造 stream,分别是:切片转换,channel 转换,函数式转换。
47 |
48 | 注意这个阶段的方法都是普通的公开方法,并不绑定 Stream 对象。
49 |
50 | ```Go
51 | // 通过可变参数模式创建 stream
52 | func Just(items ...interface{}) Stream
53 |
54 | // 通过 channel 创建 stream
55 | func Range(source <-chan interface{}) Stream
56 |
57 | // 通过函数创建 stream
58 | func From(generate GenerateFunc) Stream
59 |
60 | // 拼接 stream
61 | func Concat(s Stream, others ...Stream) Stream
62 | ```
63 |
64 | #### 加工阶段
65 |
66 | 加工阶段需要进行的操作往往对应了我们的业务逻辑,比如:转换,过滤,去重,排序等等。
67 |
68 | 这个阶段的 API 属于 method 需要绑定到 Stream 对象上。
69 |
70 | 结合常用的业务场景进行如下定义:
71 |
72 | ```Go
73 | // 去除重复item
74 | Distinct(keyFunc KeyFunc) Stream
75 | // 按条件过滤item
76 | Filter(filterFunc FilterFunc, opts ...Option) Stream
77 | // 分组
78 | Group(fn KeyFunc) Stream
79 | // 返回前n个元素
80 | Head(n int64) Stream
81 | // 返回后n个元素
82 | Tail(n int64) Stream
83 | // 转换对象
84 | Map(fn MapFunc, opts ...Option) Stream
85 | // 合并item到slice生成新的stream
86 | Merge() Stream
87 | // 反转
88 | Reverse() Stream
89 | // 排序
90 | Sort(fn LessFunc) Stream
91 | // 作用在每个item上
92 | Walk(fn WalkFunc, opts ...Option) Stream
93 | // 聚合其他Stream
94 | Concat(streams ...Stream) Stream
95 | ```
96 |
97 | 加工阶段的处理逻辑都会返回一个新的 Stream 对象,这里有个基本的实现范式
98 |
99 |
100 |
101 | #### 汇总阶段
102 |
103 | 汇总阶段其实就是我们想要的处理结果,比如:是否匹配,统计数量,遍历等等。
104 |
105 | ```Go
106 | // 检查是否全部匹配
107 | AllMatch(fn PredicateFunc) bool
108 | // 检查是否存在至少一项匹配
109 | AnyMatch(fn PredicateFunc) bool
110 | // 检查全部不匹配
111 | NoneMatch(fn PredicateFunc) bool
112 | // 统计数量
113 | Count() int
114 | // 清空stream
115 | Done()
116 | // 对所有元素执行操作
117 | ForAll(fn ForAllFunc)
118 | // 对每个元素执行操作
119 | ForEach(fn ForEachFunc)
120 | ```
121 |
122 | 梳理完组件的需求边界后,我们对于即将要实现的 Stream 有了更清晰的认识。在我的认知里面真正的架构师对于需求的把握以及后续演化能达到及其精准的地步,做到这一点离不开对需求的深入思考以及洞穿需求背后的本质。通过代入作者的视角来模拟复盘整个项目的构建流程,学习作者的思维方法论这正是我们学习开源项目最大的价值所在。
123 |
124 | 好了,我们尝试定义出完整的 Stream 接口全貌以及函数。
125 |
126 | > 接口的作用不仅仅是模版作用,还在于利用其抽象能力搭建项目整体的框架而不至于一开始就陷入细节,能快速的将我们的思考过程通过接口简洁的表达出来,学会养成自顶向下的思维方法从宏观的角度来观察整个系统,一开始就陷入细节则很容易拔剑四顾心茫然。。。
127 |
128 | ```Go
129 | rxOptions struct {
130 | unlimitedWorkers bool
131 | workers int
132 | }
133 | Option func(opts *rxOptions)
134 | // key生成器
135 | //item - stream中的元素
136 | KeyFunc func(item interface{}) interface{}
137 | // 过滤函数
138 | FilterFunc func(item interface{}) bool
139 | // 对象转换函数
140 | MapFunc func(intem interface{}) interface{}
141 | // 对象比较
142 | LessFunc func(a, b interface{}) bool
143 | // 遍历函数
144 | WalkFunc func(item interface{}, pip chan<- interface{})
145 | // 匹配函数
146 | PredicateFunc func(item interface{}) bool
147 | // 对所有元素执行操作
148 | ForAllFunc func(pip <-chan interface{})
149 | // 对每个item执行操作
150 | ForEachFunc func(item interface{})
151 | // 对每个元素并发执行操作
152 | ParallelFunc func(item interface{})
153 | // 对所有元素执行聚合操作
154 | ReduceFunc func(pip <-chan interface{}) (interface{}, error)
155 | // item生成函数
156 | GenerateFunc func(source <-chan interface{})
157 |
158 | Stream interface {
159 | // 去除重复item
160 | Distinct(keyFunc KeyFunc) Stream
161 | // 按条件过滤item
162 | Filter(filterFunc FilterFunc, opts ...Option) Stream
163 | // 分组
164 | Group(fn KeyFunc) Stream
165 | // 返回前n个元素
166 | Head(n int64) Stream
167 | // 返回后n个元素
168 | Tail(n int64) Stream
169 | // 获取第一个元素
170 | First() interface{}
171 | // 获取最后一个元素
172 | Last() interface{}
173 | // 转换对象
174 | Map(fn MapFunc, opts ...Option) Stream
175 | // 合并item到slice生成新的stream
176 | Merge() Stream
177 | // 反转
178 | Reverse() Stream
179 | // 排序
180 | Sort(fn LessFunc) Stream
181 | // 作用在每个item上
182 | Walk(fn WalkFunc, opts ...Option) Stream
183 | // 聚合其他Stream
184 | Concat(streams ...Stream) Stream
185 | // 检查是否全部匹配
186 | AllMatch(fn PredicateFunc) bool
187 | // 检查是否存在至少一项匹配
188 | AnyMatch(fn PredicateFunc) bool
189 | // 检查全部不匹配
190 | NoneMatch(fn PredicateFunc) bool
191 | // 统计数量
192 | Count() int
193 | // 清空stream
194 | Done()
195 | // 对所有元素执行操作
196 | ForAll(fn ForAllFunc)
197 | // 对每个元素执行操作
198 | ForEach(fn ForEachFunc)
199 | }
200 | ```
201 |
202 | channel() 方法用于获取 Stream 管道属性,因为在具体实现时我们面向的是接口对象所以暴露一个私有方法 read 出来。
203 |
204 | ```Go
205 | // 获取内部的数据容器channel,内部方法
206 | channel() chan interface{}
207 | ```
208 |
209 | ## 实现思路
210 |
211 | 功能定义梳理清楚了,接下来考虑几个工程实现的问题。
212 |
213 | ### 如何实现链式调用
214 |
215 | 链式调用,创建对象用到的 builder 模式可以达到链式调用效果。实际上 Stream 实现类似链式的效果原理也是一样的,每次调用完后都创建一个新的 Stream 返回给用户。
216 |
217 | ```Go
218 | // 去除重复item
219 | Distinct(keyFunc KeyFunc) Stream
220 | // 按条件过滤item
221 | Filter(filterFunc FilterFunc, opts ...Option) Stream
222 | ```
223 |
224 | ### 如何实现流水线的处理效果
225 |
226 | 所谓的流水线可以理解为数据在 Stream 中的存储容器,在 go 中我们可以使用 channel 作为数据的管道,达到 Stream 链式调用执行多个操作时**异步非阻塞**效果。
227 |
228 | ### 如何支持并行处理
229 |
230 | 数据加工本质上是在处理 channel 中的数据,那么要实现并行处理无非是并行消费 channel 而已,利用 goroutine 协程、WaitGroup 机制可以非常方便的实现并行处理。
231 |
232 | ## go-zero 实现
233 |
234 | `core/fx/stream.go`
235 |
236 | go-zero 中关于 Stream 的实现并没有定义接口,不过没关系底层实现时逻辑是一样的。
237 |
238 | 为了实现 Stream 接口我们定义一个内部的实现类,其中 source 为 channel 类型,模拟流水线功能。
239 |
240 | ```Go
241 | Stream struct {
242 | source <-chan interface{}
243 | }
244 | ```
245 |
246 | ### 创建 API
247 |
248 | #### channel 创建 Range
249 |
250 | 通过 channel 创建 stream
251 |
252 | ```Go
253 | func Range(source <-chan interface{}) Stream {
254 | return Stream{
255 | source: source,
256 | }
257 | }
258 | ```
259 |
260 | #### 可变参数模式创建 Just
261 |
262 | 通过可变参数模式创建 stream,channel 写完后及时 close 是个好习惯。
263 |
264 | ```Go
265 | func Just(items ...interface{}) Stream {
266 | source := make(chan interface{}, len(items))
267 | for _, item := range items {
268 | source <- item
269 | }
270 | close(source)
271 | return Range(source)
272 | }
273 | ```
274 |
275 | #### 函数创建 From
276 |
277 | 通过函数创建 Stream
278 |
279 | ```Go
280 | func From(generate GenerateFunc) Stream {
281 | source := make(chan interface{})
282 | threading.GoSafe(func() {
283 | defer close(source)
284 | generate(source)
285 | })
286 | return Range(source)
287 | }
288 | ```
289 |
290 | 因为涉及外部传入的函数参数调用,执行过程并不可用因此需要捕捉运行时异常防止 panic 错误传导到上层导致应用崩溃。
291 |
292 | ```Go
293 | func Recover(cleanups ...func()) {
294 | for _, cleanup := range cleanups {
295 | cleanup()
296 | }
297 | if r := recover(); r != nil {
298 | logx.ErrorStack(r)
299 | }
300 | }
301 |
302 | func RunSafe(fn func()) {
303 | defer rescue.Recover()
304 | fn()
305 | }
306 |
307 | func GoSafe(fn func()) {
308 | go RunSafe(fn)
309 | }
310 | ```
311 |
312 | #### 拼接 Concat
313 |
314 | 拼接其他 Stream 创建一个新的 Stream,调用内部 Concat method 方法,后文将会分析 Concat 的源码实现。
315 |
316 | ```Go
317 | func Concat(s Stream, others ...Stream) Stream {
318 | return s.Concat(others...)
319 | }
320 | ```
321 |
322 | ### 加工 API
323 |
324 | #### 去重 Distinct
325 |
326 | 因为传入的是函数参数`KeyFunc func(item interface{}) interface{}`意味着也同时支持按照业务场景自定义去重,本质上是利用 KeyFunc 返回的结果基于 map 实现去重。
327 |
328 | 函数参数非常强大,能极大的提升灵活性。
329 |
330 | ```Go
331 | func (s Stream) Distinct(keyFunc KeyFunc) Stream {
332 | source := make(chan interface{})
333 | threading.GoSafe(func() {
334 | // channel记得关闭是个好习惯
335 | defer close(source)
336 | keys := make(map[interface{}]lang.PlaceholderType)
337 | for item := range s.source {
338 | // 自定义去重逻辑
339 | key := keyFunc(item)
340 | // 如果key不存在,则将数据写入新的channel
341 | if _, ok := keys[key]; !ok {
342 | source <- item
343 | keys[key] = lang.Placeholder
344 | }
345 | }
346 | })
347 | return Range(source)
348 | }
349 | ```
350 |
351 | 使用案例:
352 |
353 | ```Go
354 | // 1 2 3 4 5
355 | Just(1, 2, 3, 3, 4, 5, 5).Distinct(func(item interface{}) interface{} {
356 | return item
357 | }).ForEach(func(item interface{}) {
358 | t.Log(item)
359 | })
360 |
361 | // 1 2 3 4
362 | Just(1, 2, 3, 3, 4, 5, 5).Distinct(func(item interface{}) interface{} {
363 | uid := item.(int)
364 | // 对大于4的item进行特殊去重逻辑,最终只保留一个>3的item
365 | if uid > 3 {
366 | return 4
367 | }
368 | return item
369 | }).ForEach(func(item interface{}) {
370 | t.Log(item)
371 | })
372 | ```
373 |
374 | #### 过滤 Filter
375 |
376 | 通过将过滤逻辑抽象成 FilterFunc,然后分别作用在 item 上根据 FilterFunc 返回的布尔值决定是否写回新的 channel 中实现过滤功能,实际的过滤逻辑委托给了 Walk method。
377 |
378 | Option 参数包含两个选项:
379 |
380 | 1. unlimitedWorkers 不限制协程数量
381 | 2. workers 限制协程数量
382 |
383 | ```Go
384 | FilterFunc func(item interface{}) bool
385 |
386 | func (s Stream) Filter(filterFunc FilterFunc, opts ...Option) Stream {
387 | return s.Walk(func(item interface{}, pip chan<- interface{}) {
388 | if filterFunc(item) {
389 | pip <- item
390 | }
391 | }, opts...)
392 | }
393 | ```
394 |
395 | 使用示例:
396 |
397 | ```Go
398 | func TestInternalStream_Filter(t *testing.T) {
399 | // 保留偶数 2,4
400 | channel := Just(1, 2, 3, 4, 5).Filter(func(item interface{}) bool {
401 | return item.(int)%2 == 0
402 | }).channel()
403 | for item := range channel {
404 | t.Log(item)
405 | }
406 | }
407 | ```
408 |
409 | #### 遍历执行 Walk
410 |
411 | walk 英文意思是步行,这里的意思是对每个 item 都执行一次 WalkFunc 操作并将结果写入到新的 Stream 中。
412 |
413 | 这里注意一下因为内部采用了协程机制异步执行读取和写入数据所以新的 Stream 中 channel 里面的数据顺序是随机的。
414 |
415 | ```Go
416 | // item-stream中的item元素
417 | // pipe-item符合条件则写入pipe
418 | WalkFunc func(item interface{}, pipe chan<- interface{})
419 |
420 | func (s Stream) Walk(fn WalkFunc, opts ...Option) Stream {
421 | option := buildOptions(opts...)
422 | if option.unlimitedWorkers {
423 | return s.walkUnLimited(fn, option)
424 | }
425 | return s.walkLimited(fn, option)
426 | }
427 |
428 | func (s Stream) walkUnLimited(fn WalkFunc, option *rxOptions) Stream {
429 | // 创建带缓冲区的channel
430 | // 默认为16,channel中元素超过16将会被阻塞
431 | pipe := make(chan interface{}, defaultWorkers)
432 | go func() {
433 | var wg sync.WaitGroup
434 |
435 | for item := range s.source {
436 | // 需要读取s.source的所有元素
437 | // 这里也说明了为什么channel最后写完记得完毕
438 | // 如果不关闭可能导致协程一直阻塞导致泄漏
439 | // 重要, 不赋值给val是个典型的并发陷阱,后面在另一个goroutine里使用了
440 | val := item
441 | wg.Add(1)
442 | // 安全模式下执行函数
443 | threading.GoSafe(func() {
444 | defer wg.Done()
445 | fn(item, pipe)
446 | })
447 | }
448 | wg.Wait()
449 | close(pipe)
450 | }()
451 |
452 | // 返回新的Stream
453 | return Range(pipe)
454 | }
455 |
456 | func (s Stream) walkLimited(fn WalkFunc, option *rxOptions) Stream {
457 | pipe := make(chan interface{}, option.workers)
458 | go func() {
459 | var wg sync.WaitGroup
460 | // 控制协程数量
461 | pool := make(chan lang.PlaceholderType, option.workers)
462 |
463 | for item := range s.source {
464 | // 重要, 不赋值给val是个典型的并发陷阱,后面在另一个goroutine里使用了
465 | val := item
466 | // 超过协程限制时将会被阻塞
467 | pool <- lang.Placeholder
468 | // 这里也说明了为什么channel最后写完记得完毕
469 | // 如果不关闭可能导致协程一直阻塞导致泄漏
470 | wg.Add(1)
471 |
472 | // 安全模式下执行函数
473 | threading.GoSafe(func() {
474 | defer func() {
475 | wg.Done()
476 | //执行完成后读取一次pool释放一个协程位置
477 | <-pool
478 | }()
479 | fn(item, pipe)
480 | })
481 | }
482 | wg.Wait()
483 | close(pipe)
484 | }()
485 | return Range(pipe)
486 | }
487 | ```
488 |
489 | 使用案例:
490 |
491 | 返回的顺序是随机的。
492 |
493 | ```Go
494 | func Test_Stream_Walk(t *testing.T) {
495 | // 返回 300,100,200
496 | Just(1, 2, 3).Walk(func(item interface{}, pip chan<- interface{}) {
497 | pip <- item.(int) * 100
498 | }, WithWorkers(3)).ForEach(func(item interface{}) {
499 | t.Log(item)
500 | })
501 | }
502 | ```
503 |
504 | #### 分组 Group
505 |
506 | 通过对 item 匹配放入 map 中。
507 |
508 | ```Go
509 | KeyFunc func(item interface{}) interface{}
510 |
511 | func (s Stream) Group(fn KeyFunc) Stream {
512 | groups := make(map[interface{}][]interface{})
513 | for item := range s.source {
514 | key := fn(item)
515 | groups[key] = append(groups[key], item)
516 | }
517 | source := make(chan interface{})
518 | go func() {
519 | for _, group := range groups {
520 | source <- group
521 | }
522 | close(source)
523 | }()
524 | return Range(source)
525 | }
526 | ```
527 |
528 | #### 获取前 n 个元素 Head
529 |
530 | n 大于实际数据集长度的话将会返回全部元素
531 |
532 | ```Go
533 | func (s Stream) Head(n int64) Stream {
534 | if n < 1 {
535 | panic("n must be greather than 1")
536 | }
537 | source := make(chan interface{})
538 | go func() {
539 | for item := range s.source {
540 | n--
541 | // n值可能大于s.source长度,需要判断是否>=0
542 | if n >= 0 {
543 | source <- item
544 | }
545 | // let successive method go ASAP even we have more items to skip
546 | // why we don't just break the loop, because if break,
547 | // this former goroutine will block forever, which will cause goroutine leak.
548 | // n==0说明source已经写满可以进行关闭了
549 | // 既然source已经满足条件了为什么不直接进行break跳出循环呢?
550 | // 作者提到了防止协程泄漏
551 | // 因为每次操作最终都会产生一个新的Stream,旧的Stream永远也不会被调用了
552 | if n == 0 {
553 | close(source)
554 | break
555 | }
556 | }
557 | // 上面的循环跳出来了说明n大于s.source实际长度
558 | // 依旧需要显示关闭新的source
559 | if n > 0 {
560 | close(source)
561 | }
562 | }()
563 | return Range(source)
564 | }
565 | ```
566 |
567 | 使用示例:
568 |
569 | ```Go
570 | // 返回1,2
571 | func TestInternalStream_Head(t *testing.T) {
572 | channel := Just(1, 2, 3, 4, 5).Head(2).channel()
573 | for item := range channel {
574 | t.Log(item)
575 | }
576 | }
577 | ```
578 |
579 | #### 获取后 n 个元素 Tail
580 |
581 | 这里很有意思,为了确保拿到最后 n 个元素使用环形切片 Ring 这个数据结构,先了解一下 Ring 的实现。
582 |
583 | ```Go
584 | // 环形切片
585 | type Ring struct {
586 | elements []interface{}
587 | index int
588 | lock sync.Mutex
589 | }
590 |
591 | func NewRing(n int) *Ring {
592 | if n < 1 {
593 | panic("n should be greather than 0")
594 | }
595 | return &Ring{
596 | elements: make([]interface{}, n),
597 | }
598 | }
599 |
600 | // 添加元素
601 | func (r *Ring) Add(v interface{}) {
602 | r.lock.Lock()
603 | defer r.lock.Unlock()
604 | // 将元素写入切片指定位置
605 | // 这里的取余实现了循环写效果
606 | r.elements[r.index%len(r.elements)] = v
607 | // 更新下次写入位置
608 | r.index++
609 | }
610 |
611 | // 获取全部元素
612 | // 读取顺序保持与写入顺序一致
613 | func (r *Ring) Take() []interface{} {
614 | r.lock.Lock()
615 | defer r.lock.Unlock()
616 |
617 | var size int
618 | var start int
619 | // 当出现循环写的情况时
620 | // 开始读取位置需要通过去余实现,因为我们希望读取出来的顺序与写入顺序一致
621 | if r.index > len(r.elements) {
622 | size = len(r.elements)
623 | // 因为出现循环写情况,当前写入位置index开始为最旧的数据
624 | start = r.index % len(r.elements)
625 | } else {
626 | size = r.index
627 | }
628 | elements := make([]interface{}, size)
629 | for i := 0; i < size; i++ {
630 | // 取余实现环形读取,读取顺序保持与写入顺序一致
631 | elements[i] = r.elements[(start+i)%len(r.elements)]
632 | }
633 |
634 | return elements
635 | }
636 | ```
637 |
638 | 总结一下环形切片的优点:
639 |
640 | - 支持自动滚动更新
641 | - 节省内存
642 |
643 | 环形切片能实现固定容量满的情况下旧数据不断被新数据覆盖,由于这个特性可以用于读取 channel 后 n 个元素。
644 |
645 | ```Go
646 | func (s Stream) Tail(n int64) Stream {
647 | if n < 1 {
648 | panic("n must be greather than 1")
649 | }
650 | source := make(chan interface{})
651 | go func() {
652 | ring := collection.NewRing(int(n))
653 | // 读取全部元素,如果数量>n环形切片能实现新数据覆盖旧数据
654 | // 保证获取到的一定最后n个元素
655 | for item := range s.source {
656 | ring.Add(item)
657 | }
658 | for _, item := range ring.Take() {
659 | source <- item
660 | }
661 | close(source)
662 | }()
663 | return Range(source)
664 | }
665 | ```
666 |
667 | 那么为什么不直接使用 len(source) 长度的切片呢?
668 |
669 | 答案是节省内存。凡是涉及到环形类型的数据结构时都具备一个优点那就省内存,能做到按需分配资源。
670 |
671 | 使用示例:
672 |
673 | ```Go
674 | func TestInternalStream_Tail(t *testing.T) {
675 | // 4,5
676 | channel := Just(1, 2, 3, 4, 5).Tail(2).channel()
677 | for item := range channel {
678 | t.Log(item)
679 | }
680 | // 1,2,3,4,5
681 | channel2 := Just(1, 2, 3, 4, 5).Tail(6).channel()
682 | for item := range channel2 {
683 | t.Log(item)
684 | }
685 | }
686 | ```
687 |
688 | #### 元素转换Map
689 |
690 | 元素转换,内部由协程完成转换操作,注意输出channel并不保证按原序输出。
691 |
692 | ```Go
693 | MapFunc func(intem interface{}) interface{}
694 | func (s Stream) Map(fn MapFunc, opts ...Option) Stream {
695 | return s.Walk(func(item interface{}, pip chan<- interface{}) {
696 | pip <- fn(item)
697 | }, opts...)
698 | }
699 | ```
700 |
701 | 使用示例:
702 |
703 | ```Go
704 | func TestInternalStream_Map(t *testing.T) {
705 | channel := Just(1, 2, 3, 4, 5, 2, 2, 2, 2, 2, 2).Map(func(item interface{}) interface{} {
706 | return item.(int) * 10
707 | }).channel()
708 | for item := range channel {
709 | t.Log(item)
710 | }
711 | }
712 | ```
713 |
714 | #### 合并 Merge
715 |
716 | 实现比较简单
717 |
718 | ```Go
719 | func (s Stream) Merge() Stream {
720 | var items []interface{}
721 | for item := range s.source {
722 | items = append(items, item)
723 | }
724 | source := make(chan interface{}, 1)
725 | source <- items
726 | close(source)
727 | return Range(source)
728 | }
729 | ```
730 |
731 | #### 反转 Reverse
732 |
733 | 反转 channel 中的元素。反转算法流程是:
734 |
735 | - 找到中间节点
736 |
737 | - 节点两边开始两两交换
738 |
739 | 注意一下为什么获取 s.source 时用切片来接收呢? 切片会自动扩容,用数组不是更好吗?
740 |
741 | 其实这里是不能用数组的,因为不知道 Stream 写入 source 的操作往往是在协程异步写入的,每个 Stream 中的 channel 都可能在动态变化,用流水线来比喻 Stream 工作流程的确非常形象。
742 |
743 | ```Go
744 | func (s Stream) Reverse() Stream {
745 | var items []interface{}
746 | for item := range s.source {
747 | items = append(items, item)
748 | }
749 | for i := len(items)/2 - 1; i >= 0; i-- {
750 | opp := len(items) - 1 - i
751 | items[i], items[opp] = items[opp], items[i]
752 | }
753 | return Just(items...)
754 | }
755 | ```
756 |
757 | 使用示例:
758 |
759 | ```Go
760 | func TestInternalStream_Reverse(t *testing.T) {
761 | channel := Just(1, 2, 3, 4, 5).Reverse().channel()
762 | for item := range channel {
763 | t.Log(item)
764 | }
765 | }
766 | ```
767 |
768 | #### 排序 Sort
769 |
770 | 内网调用 slice 官方包的排序方案,传入比较函数实现比较逻辑即可。
771 |
772 | ```Go
773 | func (s Stream) Sort(less LessFunc) Stream {
774 | var items []interface{}
775 | for item := range s.source {
776 | items = append(items, item)
777 | }
778 |
779 | sort.Slice(items, func(i, j int) bool {
780 | return less(items[i], items[j])
781 | })
782 | return Just(items...)
783 | }
784 | ```
785 |
786 | 使用示例:
787 |
788 | ```Go
789 | // 5,4,3,2,1
790 | func TestInternalStream_Sort(t *testing.T) {
791 | channel := Just(1, 2, 3, 4, 5).Sort(func(a, b interface{}) bool {
792 | return a.(int) > b.(int)
793 | }).channel()
794 | for item := range channel {
795 | t.Log(item)
796 | }
797 | }
798 | ```
799 |
800 | #### 拼接 Concat
801 |
802 | ```Go
803 | func (s Stream) Concat(steams ...Stream) Stream {
804 | // 创建新的无缓冲channel
805 | source := make(chan interface{})
806 | go func() {
807 | // 创建一个waiGroup对象
808 | group := threading.NewRoutineGroup()
809 | // 异步从原channel读取数据
810 | group.Run(func() {
811 | for item := range s.source {
812 | source <- item
813 | }
814 | })
815 | // 异步读取待拼接Stream的channel数据
816 | for _, stream := range steams {
817 | // 每个Stream开启一个协程
818 | group.Run(func() {
819 | for item := range stream.channel() {
820 | source <- item
821 | }
822 | })
823 | }
824 | // 阻塞等待读取完成
825 | group.Wait()
826 | close(source)
827 | }()
828 | // 返回新的Stream
829 | return Range(source)
830 | }
831 | ```
832 |
833 | ### 汇总 API
834 |
835 | #### 全部匹配 AllMatch
836 |
837 | ```Go
838 | func (s Stream) AllMatch(fn PredicateFunc) bool {
839 | for item := range s.source {
840 | if !fn(item) {
841 | // 需要排空 s.source,否则前面的goroutine可能阻塞
842 | go drain(s.source)
843 | return false
844 | }
845 | }
846 |
847 | return true
848 | }
849 | ```
850 |
851 | #### 任意匹配 AnyMatch
852 |
853 | ```Go
854 | func (s Stream) AnyMatch(fn PredicateFunc) bool {
855 | for item := range s.source {
856 | if fn(item) {
857 | // 需要排空 s.source,否则前面的goroutine可能阻塞
858 | go drain(s.source)
859 | return true
860 | }
861 | }
862 |
863 | return false
864 | }
865 | ```
866 |
867 | #### 一个也不匹配 NoneMatch
868 |
869 | ```Go
870 | func (s Stream) NoneMatch(fn func(item interface{}) bool) bool {
871 | for item := range s.source {
872 | if fn(item) {
873 | // 需要排空 s.source,否则前面的goroutine可能阻塞
874 | go drain(s.source)
875 | return false
876 | }
877 | }
878 |
879 | return true
880 | }
881 | ```
882 |
883 | #### 数量统计 Count
884 |
885 | ```Go
886 | func (s Stream) Count() int {
887 | var count int
888 | for range s.source {
889 | count++
890 | }
891 | return count
892 | }
893 | ```
894 |
895 | #### 清空 Done
896 |
897 | ```Go
898 | func (s Stream) Done() {
899 | // 排空 channel,防止 goroutine 阻塞泄露
900 | drain(s.source)
901 | }
902 | ```
903 |
904 | #### 迭代全部元素 ForAll
905 |
906 | ```Go
907 | func (s Stream) ForAll(fn ForAllFunc) {
908 | fn(s.source)
909 | }
910 | ```
911 |
912 | #### 迭代每个元素 ForEach
913 |
914 | ```Go
915 | func (s Stream) ForEach(fn ForEachFunc) {
916 | for item := range s.source {
917 | fn(item)
918 | }
919 | }
920 | ```
921 |
922 | ## 小结
923 |
924 | 至此 Stream 组件就全部实现完了,核心逻辑是利用 channel 当做管道,数据当做水流,不断的用协程接收/写入数据到 channel 中达到异步非阻塞的效果。
925 |
926 | 实现高效的基础来源三个语言特性:
927 |
928 | - channel
929 | - 协程
930 | - 函数式编程
931 |
932 | ## 强烈推荐!
933 |
934 | go-zero: [https://github.com/zeromicro/go-zero](https://github.com/zeromicro/go-zero)
935 |
936 | ## 欢迎 star!⭐
937 |
938 | 如果你正在使用或者觉得这个项目对你有帮助,请 **star** 支持,感谢!
939 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # stream
4 |
5 | English | [简体中文](readme-cn.md)
6 |
7 | [](https://github.com/kevwan/stream/actions)
8 | [](https://codecov.io/gh/kevwan/stream)
9 | [](https://goreportcard.com/report/github.com/kevwan/stream)
10 | [](https://github.com/kevwan/stream)
11 | [](https://opensource.org/licenses/MIT)
12 |
13 | ## Why we have this repo
14 |
15 | `stream` is part of [go-zero](https://github.com/zeromicro/go-zero), but a few people asked if `fx` (it’s called fx in go-zero) can be used separately. But I recommend you to use `go-zero` for many more features.
16 |
17 |
18 |
19 | ## What is Stream Processing
20 |
21 | Java developers should be very impressed with Stream API in Java, which greatly improves the ability to handle data collections.
22 |
23 | ```java
24 | int sum = widgets.stream()
25 | .filter(w -> w.getColor() == RED)
26 | .mapToInt(w -> w.getWeight())
27 | .sum();
28 | ```
29 |
30 | The idea of Stream is to abstract the data processing into a data stream and return a new stream for use after each process.
31 |
32 | ## Stream Function Definition
33 |
34 | The most important step is to think through the requirements before writing the code, so let's try to put ourselves in the author's shoes and think about the flow of the component. First of all, let's put the underlying implementation logic aside and try to define the stream function from scratch.
35 |
36 | Stream's workflow is actually part of the production-consumer model, and the whole process is very similar to the production process in a factory.
37 |
38 | 1. creation phase/data acquisition (raw material)
39 | 2. processing phase/intermediate processing (pipeline processing)
40 | 3. aggregation stage/final operation (final product)
41 |
42 | The API is defined around the three life cycles of a stream.
43 |
44 | #### Creation Phase
45 |
46 | In order to create the abstract object stream, it can be understood as a constructor.
47 |
48 | We support three ways of constructing streams: slicing conversion, channel conversion, and functional conversion.
49 |
50 | Note that the methods in this phase are normal public methods and are not bound to the stream object.
51 |
52 | ```Go
53 | // Create stream by variable parameter pattern
54 | func Just(items ... .interface{}) Stream
55 |
56 | // Create a stream via channel
57 | func Range(source <-chan interface{}) Stream
58 |
59 | // Create stream by function
60 | func From(generate GenerateFunc) Stream
61 |
62 | // Concatenate a stream
63 | func Concat(s Stream, others . . Stream) Stream
64 | ```
65 |
66 | #### Processing phase
67 |
68 | The operations required in the processing phase often correspond to our business logic, such as conversion, filtering, de-duplication, sorting, and so on.
69 |
70 | The API for this phase is a method that needs to be bound to a Stream object.
71 |
72 | The following definition is combined with common business scenarios.
73 |
74 | ```Go
75 | // Remove duplicate items
76 | Distinct(keyFunc KeyFunc) Stream
77 | // Filter item by condition
78 | Filter(filterFunc FilterFunc, opts ... . Option) Stream
79 | // Grouping
80 | Group(fn KeyFunc) Stream
81 | // Return the first n elements
82 | Head(n int64) Stream
83 | // Returns the last n elements
84 | Tail(n int64) Stream
85 | // Convert objects
86 | Map(fn MapFunc, opts . . Option) Stream
87 | // Merge items into slice to create a new stream
88 | Merge() Stream
89 | // Reverse
90 | Reverse() Stream
91 | // Sort
92 | Sort(fn LessFunc) Stream
93 | // Works on each item
94 | Walk(fn WalkFunc, opts ... . Option) Stream
95 | // Aggregate other Streams
96 | Concat(streams ... . Stream) Stream
97 | ```
98 |
99 | The processing logic of the processing phase returns a new Stream object, and there is a basic implementation paradigm here.
100 |
101 |
102 |
103 | #### Aggregation stage
104 |
105 | The aggregation phase is actually the result of the processing we want, e.g. whether it matches, count the number, traverse, etc.
106 |
107 | ```Go
108 | // Check for all matches
109 | AllMatch(fn PredicateFunc) bool
110 | // Check if at least one match exists
111 | AnyMatch(fn PredicateFunc) bool
112 | // Check for all mismatches
113 | NoneMatch(fn PredicateFunc) bool
114 | // Count the number of matches
115 | Count() int
116 | // Clear the stream
117 | Done()
118 | // Perform an operation on all elements
119 | ForAll(fn ForAllFunc)
120 | // Perform an operation on each element
121 | ForEach(fn ForEachFunc)
122 | ```
123 |
124 | After sorting out the requirements boundaries of the component, we have a clearer idea of what we are going to implement with Stream. In my perception, a real architect's grasp of requirements and their subsequent evolution can be very precise, and this can only be achieved by thinking deeply about the requirements and penetrating the essence behind them. By replacing the author's perspective to simulate the entire project build process, learning the author's thinking methodology is the greatest value of our learning open source projects.
125 |
126 | Well, let's try to define the complete Stream interface and functions.
127 |
128 | > The role of the interface is not just a template, but also to use its abstraction capabilities to build the overall framework of the project without getting bogged down in the details at the beginning, to quickly express our thinking process through the interface concisely, to learn to develop a top-down thinking approach to observe the whole system from a global perspective, it is easy to get bogged down in the details at the beginning.
129 |
130 | ```Go
131 | rxOptions struct {
132 | unlimitedWorkers bool
133 | workers int
134 | }
135 | Option func(opts *rxOptions)
136 | // key generator
137 | // item - the element in the stream
138 | KeyFunc func(item interface{}) interface{}
139 | // filter function
140 | FilterFunc func(item interface{}) bool
141 | // object conversion function
142 | MapFunc func(intem interface{}) interface{}
143 | // object comparison
144 | LessFunc func(a, b interface{}) bool
145 | // traversal function
146 | WalkFunc func(item interface{}, pip chan<- interface{})
147 | // match function
148 | PredicateFunc func(item interface{}) bool
149 | // perform an operation on all elements
150 | ForAllFunc func(pip <-chan interface{})
151 | // performs an operation on each item
152 | ForEachFunc func(item interface{})
153 | // execute operations on each element concurrently
154 | ParallelFunc func(item interface{})
155 | // execute the aggregation operation on all elements
156 | ReduceFunc func(pip <-chan interface{}) (interface{}, error)
157 | // item generation function
158 | GenerateFunc func(source <-chan interface{})
159 |
160 | Stream interface {
161 | // Remove duplicate items
162 | Distinct(keyFunc KeyFunc) Stream
163 | // Filter item by condition
164 | Filter(filterFunc FilterFunc, opts . . Option) Stream
165 | // Grouping
166 | Group(fn KeyFunc) Stream
167 | // Return the first n elements
168 | Head(n int64) Stream
169 | // Returns the last n elements
170 | Tail(n int64) Stream
171 | // Get the first element
172 | First() interface{}
173 | // Get the last element
174 | Last() interface{}
175 | // Convert the object
176 | Map(fn MapFunc, opts . . Option) Stream
177 | // Merge items into slice to create a new stream
178 | Merge() Stream
179 | // Reverse
180 | Reverse() Stream
181 | // Sort
182 | Sort(fn LessFunc) Stream
183 | // Works on each item
184 | Walk(fn WalkFunc, opts ... . Option) Stream
185 | // Aggregate other Streams
186 | Concat(streams ... . Stream) Stream
187 | // Check for all matches
188 | AllMatch(fn PredicateFunc) bool
189 | // Check if there is at least one match
190 | AnyMatch(fn PredicateFunc) bool
191 | // Check for all mismatches
192 | NoneMatch(fn PredicateFunc) bool
193 | // Count the number of matches
194 | Count() int
195 | // Clear the stream
196 | Done()
197 | // Perform an operation on all elements
198 | ForAll(fn ForAllFunc)
199 | // Perform an operation on each element
200 | ForEach(fn ForEachFunc)
201 | }
202 | ```
203 |
204 | The channel() method is used to get the Stream pipeline properties, since we are dealing with the interface object in the implementation, we expose a private method to read out.
205 |
206 | ```Go
207 | // Get the internal data container channel, internal method
208 | channel() chan interface{}
209 | ```
210 |
211 | ## Implementation ideas
212 |
213 | With the functional definition sorted out, next consider a few engineering implementations.
214 |
215 | ### How to implement chain calls
216 |
217 | Chain calls, the builder pattern used to create objects can achieve the chain call effect. In fact, Stream implements a similar chain effect on the same principle, creating a new Stream to return in each call.
218 |
219 | ```Go
220 | // Remove duplicate items
221 | Distinct(keyFunc KeyFunc) Stream
222 | // Filter item by condition
223 | Filter(filterFunc FilterFunc, opts . . Option) Stream
224 | ```
225 |
226 | ### How to achieve the effect of pipeline processing
227 |
228 | The pipeline can be understood as a storage container for data in Stream. In go we can use channel as a pipeline for data to achieve the effect of **asynchronous non-blocking** when Stream chain calls perform multiple operations.
229 |
230 | ### How to support parallel processing
231 |
232 | Data processing is essentially processing the data in the channel, so to achieve parallel processing is simply to consume the channel in parallel, using the goroutine and WaitGroup can be very convenient to achieve parallel processing.
233 |
234 | ## go-zero implementation
235 |
236 | `core/fx/stream.go`
237 |
238 | The implementation of Stream in go-zero does not define an interface, but the logic is the same when it comes to the underlying implementation.
239 |
240 | To implement the Stream interface we define an internal implementation class, where source is of type channel, to emulate the pipeline functionality.
241 |
242 | ```Go
243 | Stream struct {
244 | source <-chan interface{}
245 | }
246 | ```
247 |
248 | ### Create API
249 |
250 | #### channel Creation of Range
251 |
252 | Create stream via channel
253 |
254 | ```Go
255 | func Range(source <-chan interface{}) Stream {
256 | return Stream{
257 | source: source,
258 | }
259 | }
260 | ```
261 |
262 | #### Variable Parameter Pattern Creation of Just
263 |
264 | It's a good habit to create streams in variable parameter mode and close the channel when you're done writing.
265 |
266 | ```Go
267 | func Just(items ... .interface{}) Stream {
268 | source := make(chan interface{}, len(items))
269 | for _, item := range items {
270 | source <- item
271 | }
272 | close(source)
273 | return Range(source)
274 | }
275 | ```
276 |
277 | #### function to create From
278 |
279 | Stream creation by function
280 |
281 | ```Go
282 | func From(generate GenerateFunc) Stream {
283 | source := make(chan interface{})
284 | threading.GoSafe(func() {
285 | defer close(source)
286 | generate(source)
287 | })
288 | return Range(source)
289 | }
290 | ```
291 |
292 | Because it involves external calls to function parameters, the execution process is not available so you need to catch runtime exceptions to prevent panic errors from being transmitted to the upper layers and crashing the application.
293 |
294 | ```Go
295 | func Recover(cleanups ... . func()) {
296 | for _, cleanup := range cleanups {
297 | cleanup()
298 | }
299 | if r := recover(); r ! = nil {
300 | logx.ErrorStack(r)
301 | }
302 | }
303 |
304 | func RunSafe(fn func()) {
305 | defer rescue.Recover()
306 | fn()
307 | }
308 |
309 | func GoSafe(fn func()) {
310 | go Runsage(fn)
311 | }
312 | ```
313 |
314 | #### Splicing Concat
315 |
316 | Splice other Streams to create a new Stream, calling the internal Concat method method, the source code implementation of Concat will be analyzed later.
317 |
318 | ```Go
319 | func Concat(s Stream, others . . Stream) Stream {
320 | return s.Concat(others...)
321 | }
322 | ```
323 |
324 | ### Processing API
325 |
326 | #### de-duplication Distinct
327 |
328 | Because the function parameter `KeyFunc func(item interface{}) interface{}` is passed in, it means that it also supports custom distincting according to business scenarios, essentially using the results returned by KeyFunc to achieve distincting based on a map.
329 |
330 | The function arguments are very powerful and provide a great deal of flexibility.
331 |
332 | ```Go
333 | func (s Stream) Distinct(keyFunc KeyFunc) Stream {
334 | source := make(chan interface{})
335 | threading.GoSafe(func() {
336 | // It's a good habit for channels to remember to close
337 | defer close(source)
338 | keys := make(map[interface{}]lang.PlaceholderType)
339 | for item := range s.source {
340 | // Custom de-duplication logic
341 | key := keyFunc(item)
342 | // If the key does not exist, write the data to a new channel
343 | if _, ok := keys[key]; !ok {
344 | source <- item
345 | keys[key] = lang.
346 | Placeholder }
347 | }
348 | })
349 | return Range(source)
350 | }
351 | ```
352 |
353 | Use case.
354 |
355 | ```Go
356 | // 1 2 3 4 5
357 | Just(1, 2, 3, 3, 4, 5, 5).Distinct(func(item interface{}) interface{} {
358 | return item
359 | }).ForEach(func(item interface{}) {
360 | t.Log(item)
361 | })
362 |
363 | // 1 2 3 4
364 | Just(1, 2, 3, 3, 4, 5, 5).Distinct(func(item interface{}) interface{} {
365 | uid := item.(int)
366 | // Special de-duplication logic for items greater than 4, so that only one item > 3 is retained
367 | if uid > 3 {
368 | return 4
369 | }
370 | return item
371 | }).ForEach(func(item interface{}) {
372 | t.Log(item)
373 | })
374 | ```
375 |
376 | #### Filter Filter
377 |
378 | The actual filtering logic is delegated to the Walk method by abstracting the filtering logic into a FilterFunc and then acting on the item separately to decide whether to write back to a new channel based on the Boolean value returned by the FilterFunc.
379 |
380 | The Option parameter contains two options.
381 |
382 | 1. unlimitedWorkers No limit on the number of concurrent processes
383 | 2. workers Limit the number of concurrent processes
384 |
385 | ```Go
386 | FilterFunc func(item interface{}) bool
387 |
388 | func (s Stream) Filter(filterFunc FilterFunc, opts . . Option) Stream {
389 | return s.Walk(func(item interface{}, pip chan<- interface{}) {
390 | if filterFunc(item) {
391 | pip <- item
392 | }
393 | }, opts...)
394 | }
395 | ```
396 |
397 | Example usage.
398 |
399 | ```Go
400 | func TestInternalStream_Filter(t *testing.T) {
401 | // keep even numbers 2,4
402 | channel := Just(1, 2, 3, 4, 5).Filter(func(item interface{}) bool {
403 | return item.(int)%2 == 0
404 | }).channel()
405 | for item := range channel {
406 | t.Log(item)
407 | }
408 | }
409 | ```
410 |
411 | #### Iterate through the Walk
412 |
413 | walk means walk, here it means to perform a WalkFunc operation on each item and write the result to a new Stream.
414 |
415 | Note here that the order of the data in the channel of the new Stream is random because the internal concurrent mechanism is used to read and write data asynchronously.
416 |
417 | ```Go
418 | // item element in item-stream
419 | // The pipe-item is written to the pipe if it matches the condition
420 | WalkFunc func(item interface{}, pipe chan<- interface{})
421 |
422 | func (s Stream) Walk(fn WalkFunc, opts . .Option) Stream {
423 | option := buildOptions(opts...)
424 | if option.unlimitedWorkers {
425 | return s.walkUnLimited(fn, option)
426 | }
427 | return s.walkLimited(fn, option)
428 | }
429 |
430 | func (s Stream) walkUnLimited(fn WalkFunc, option *rxOptions) Stream {
431 | // Create a channel with a buffer
432 | // default is 16, channel with more than 16 elements will be blocked
433 | pipe := make(chan interface{}, defaultWorkers)
434 | go func() {
435 | var wg sync.WaitGroup
436 |
437 | for item := range s.source {
438 | // All elements of s.source need to be read
439 | // This also explains why the channel is written last and remembered to finish
440 | // If it is not closed, it may lead to leaks and blocking
441 | // Important, not assigning a value to val is a classic concurrency trap, and is used later in another goroutine
442 | val := item
443 | wg.Add(1)
444 | // Execute the function in safe mode
445 | threading.GoSafe(func() {
446 | defer wg.Done()
447 | fn(item, pipe)
448 | })
449 | }
450 | wg.Wait()
451 | close(pipe)
452 | }()
453 |
454 | // return a new Stream
455 | return Range(pipe)
456 | }
457 |
458 | func (s Stream) walkLimited(fn WalkFunc, option *rxOptions) Stream {
459 | pipe := make(chan interface{}, option.workers)
460 | go func() {
461 | var wg sync.WaitGroup
462 | // Control the number of concurrent processes
463 | pool := make(chan lang.PlaceholderType, option.workers)
464 |
465 | for item := range s.source {
466 | // Important, not assigning a value to val is a classic concurrency trap, used later in another goroutine
467 | val := item
468 | // will block if the concurrency limit is exceeded
469 | pool <- lang.
470 | // This also explains why the channel is written last and remembered to finish
471 | // If you don't close it, it may cause the concurrent thread to keep blocking and lead to leaks
472 | wg.Add(1)
473 |
474 | // Execute the function in safe mode
475 | threading.GoSafe(func() {
476 | defer func() {
477 | wg.Done()
478 | // Read the pool once to release a concurrent location after execution is complete
479 | <-pool
480 | }()
481 | fn(item, pipe)
482 | })
483 | }
484 | wg.Wait()
485 | close(pipe)
486 | }()
487 | return Range(pipe)
488 | }
489 | ```
490 |
491 | Use case.
492 |
493 | The order of returns is randomized.
494 |
495 | ```Go
496 | func Test_Stream_Walk(t *testing.T) {
497 | // return 300,100,200
498 | Just(1, 2, 3).Walk(func(item interface{}, pip chan<- interface{}) {
499 | pip <- item.(int) * 100
500 | }, WithWorkers(3)).ForEach(func(item interface{}) {
501 | t.Log(item)
502 | })
503 | }
504 | ```
505 |
506 | #### Grouping Groups
507 |
508 | Put in map by matching item.
509 |
510 | ```Go
511 | KeyFunc func(item interface{}) interface{}
512 |
513 | func (s Stream) Group(fn KeyFunc) Stream {
514 | groups := make(map[interface{}][]interface{})
515 | for item := range s.source {
516 | key := fn(item)
517 | groups[key] = append(groups[key], item)
518 | }
519 | source := make(chan interface{})
520 | go func() {
521 | for _, group := range groups {
522 | source <- group
523 | }
524 | close(source)
525 | }()
526 | return Range(source)
527 | }
528 | ```
529 |
530 | #### gets the first n elements of Head
531 |
532 | n is greater than the actual dataset length, all elements will be returned
533 |
534 | ```Go
535 | func (s Stream) Head(n int64) Stream {
536 | if n < 1 {
537 | panic("n must be greather than 1")
538 | }
539 | source := make(chan interface{})
540 | go func() {
541 | for item := range s.source {
542 | n--
543 | // The value of n may be greater than the length of s.source, you need to determine if it is >= 0
544 | if n >= 0 {
545 | source <- item
546 | }
547 | // let successive method go ASAP even we have more items to skip
548 | // why we don't just break the loop, because if break,
549 | // this former goroutine will block forever, which will cause goroutine leak.
550 | // n==0 means that source is full and can be closed
551 | // Since source has met the condition, why not just break and jump out of the loop?
552 | // The author mentions preventing goroutine leaks
553 | // Because each operation will eventually create a new Stream, and the old one will never be called
554 | if n == 0 {
555 | close(source)
556 | break
557 | }
558 | }
559 | // The above loop jumped out of the loop, which means n is greater than the actual length of s.source
560 | // still need to show the new source closed
561 | if n > 0 {
562 | close(source)
563 | }
564 | }()
565 | return Range(source)
566 | }
567 | ```
568 |
569 | Example usage.
570 |
571 | ```Go
572 | // return 1,2
573 | func TestInternalStream_Head(t *testing.T) {
574 | channel := Just(1, 2, 3, 4, 5).Head(2).channel()
575 | for item := range channel {
576 | t.Log(item)
577 | }
578 | }
579 | ```
580 |
581 | #### Get the last n elements of Tail
582 |
583 | It is interesting to understand the implementation of the Ring in order to ensure that the last n elements are obtained using the Ring data structure.
584 |
585 | ```Go
586 | // ring slicing
587 | type Ring struct {
588 | elements []interface{}
589 | index int
590 | lock sync.Mutex
591 | }
592 |
593 | func NewRing(n int) *Ring {
594 | if n < 1 {
595 | panic("n should be greather than 0")
596 | }
597 | return &Ring{
598 | elements: make([]interface{}, n),
599 | }
600 | }
601 |
602 | // Add elements
603 | func (r *Ring) Add(v interface{}) {
604 | r.lock.Lock()
605 | defer r.lock.Unlock()
606 | // Write the element to the slice at the specified location
607 | // The remainder here achieves a circular writing effect
608 | r.elements[r.index%len(r.elements)] = v
609 | // Update the next write position
610 | r.index++
611 | }
612 |
613 | // Get all elements
614 | // Keep the read order the same as the write order
615 | func (r *Ring) Take() []interface{} {
616 | r.lock.Lock()
617 | defer r.lock.Unlock()
618 |
619 | var size int
620 | var start int
621 | // When there is a circular write situation
622 | // The start read position needs to be decimalized, because we want the read order to be the same as the write order
623 | if r.index > len(r.elements) {
624 | size = len(r.elements)
625 | // Because of the cyclic write situation, the current write position index starts with the oldest data
626 | start = r.index % len(r.elements)
627 | } else {
628 | size = r.index
629 | }
630 | elements := make([]interface{}, size)
631 | for i := 0; i < size; i++ {
632 | // Read the remainder in a circular fashion, keeping the read order the same as the write order
633 | elements[i] = r.elements[(start+i)%len(r.elements)]
634 | }
635 |
636 | return elements
637 | }
638 | ```
639 |
640 | To summarize the advantages of ring slicing.
641 |
642 | - Supports automatic scrolling updates
643 | - Memory saving
644 |
645 | Ring slicing enables old data to be overwritten by new data when the fixed capacity is full, and can be used to read n elements after the channel due to this feature.
646 |
647 | ```Go
648 | func (s Stream) Tail(n int64) Stream {
649 | if n < 1 {
650 | panic("n must be greather than 1")
651 | }
652 | source := make(chan interface{})
653 | go func() {
654 | ring := collection.NewRing(int(n))
655 | // Read all elements, if the number > n ring slices can achieve new data over old data
656 | // ensure that the last n elements are obtained
657 | for item := range s.source {
658 | ring.Add(item)
659 | }
660 | for _, item := range ring.Take() {
661 | source <- item
662 | }
663 | close(source)
664 | }()
665 | return Range(source)
666 | }
667 | ```
668 |
669 | So why not just use a len(source) length slice?
670 |
671 | The answer is to save memory. Any data structure that involves a ring type has the advantage of saving memory and allocating resources on demand.
672 |
673 | Example usage.
674 |
675 | ```Go
676 | func TestInternalStream_Tail(t *testing.T) {
677 | // 4,5
678 | channel := Just(1, 2, 3, 4, 5).Tail(2).channel()
679 | for item := range channel {
680 | t.Log(item)
681 | }
682 | // 1,2,3,4,5
683 | channel2 := Just(1, 2, 3, 4, 5).Tail(6).channel()
684 | for item := range channel2 {
685 | t.Log(item)
686 | }
687 | }
688 | ```
689 |
690 | #### element conversion Map
691 |
692 | Element conversion, internally done by a concurrent process to complete the conversion operation, note that the output channel is not guaranteed to be output in the original order.
693 |
694 | ``` Go
695 | MapFunc func(intem interface{}) interface{}
696 | func (s Stream) Map(fn MapFunc, opts . . Option) Stream {
697 | return s.Walk(func(item interface{}, pip chan<- interface{}) {
698 | pip <- fn(item)
699 | }, opts...)
700 | }
701 | ```
702 |
703 | Example usage.
704 |
705 | ```Go
706 | func TestInternalStream_Map(t *testing.T) {
707 | channel := Just(1, 2, 3, 4, 5, 2, 2, 2, 2, 2, 2, 2).Map(func(item interface{}) interface{} {
708 | return item.(int) * 10
709 | }).channel()
710 | for item := range channel {
711 | t.Log(item)
712 | }
713 | }
714 | ```
715 |
716 | #### Merge Merge
717 |
718 | The implementation is relatively simple, and I've thought long and hard about what scenarios would be suitable for this method.
719 |
720 | ```Go
721 | func (s Stream) Merge() Stream {
722 | var items []interface{}
723 | for item := range s.source {
724 | items = append(items, item)
725 | }
726 | source := make(chan interface{}, 1)
727 | source <- items
728 | close(source)
729 | return Range(source)
730 | }
731 | ```
732 |
733 | #### Reverse
734 |
735 | Reverses the elements of the channel. The flow of the reversal algorithm is
736 |
737 | - Find the middle node
738 | - The two sides of the node start swapping two by two
739 |
740 | Notice why slices are used to receive s.source when it is fetched? Slices are automatically expanded, wouldn't it be better to use arrays?
741 |
742 | In fact, you can't use arrays here, because you don't know that Stream writing to source is often done asynchronously in a concurrent process, and the channels in each Stream may change dynamically.
743 |
744 | ```Go
745 | func (s Stream) Reverse() Stream {
746 | var items []interface{}
747 | for item := range s.source {
748 | items = append(items, item)
749 | }
750 | for i := len(items)/2 - 1; i >= 0; i-- {
751 | opp := len(items) - 1 - i
752 | items[i], items[opp] = items[opp], items[i]
753 | }
754 | return Just(items...)
755 | }
756 | ```
757 |
758 | Example usage.
759 |
760 | ```Go
761 | func TestInternalStream_Reverse(t *testing.T) {
762 | channel := Just(1, 2, 3, 4, 5).Reverse().channel()
763 | for item := range channel {
764 | t.Log(item)
765 | }
766 | }
767 | ```
768 |
769 | #### Sort
770 |
771 | The intranet calls the official slice package sorting scheme, just pass in the comparison function to implement the comparison logic.
772 |
773 | ```Go
774 | func (s Stream) Sort(less LessFunc) Stream {
775 | var items []interface{}
776 | for item := range s.source {
777 | items = append(items, item)
778 | }
779 |
780 | sort.Slice(items, func(i, j int) bool {
781 | return less(items[i], items[j])
782 | })
783 | return Just(items...)
784 | }
785 | ```
786 |
787 | Example usage.
788 |
789 | ```Go
790 | // 5,4,3,2,1
791 | func TestInternalStream_Sort(t *testing.T) {
792 | channel := Just(1, 2, 3, 4, 5).Sort(func(a, b interface{}) bool {
793 | return a.(int) > b.(int)
794 | }).channel()
795 | for item := range channel {
796 | t.Log(item)
797 | }
798 | }
799 | ```
800 |
801 | #### Splicing Concat
802 |
803 | ```Go
804 | func (s Stream) Concat(steams . .Stream) Stream {
805 | // Create a new unbuffered channel
806 | source := make(chan interface{})
807 | go func() {
808 | // Create a waiGroup object
809 | NewRoutineGroup()
810 | // Asynchronously read data from the original channel
811 | group.Run(func() {
812 | for item := range s.source {
813 | source <- item
814 | }
815 | })
816 | // Asynchronously read the channel data of the Stream to be stitched
817 | for _, stream := range steams {
818 | // open a concurrent process for each Stream
819 | group.Run(func() {
820 | for item := range stream.channel() {
821 | source <- item
822 | }
823 | })
824 | }
825 | // Block and wait for the read to complete
826 | group.Wait()
827 | close(source)
828 | }()
829 | // return a new Stream
830 | return Range(source)
831 | }
832 | ```
833 |
834 | ### Aggregate API
835 |
836 | #### Match All AllMatch
837 |
838 | ```Go
839 | func (s Stream) AllMatch(fn PredicateFunc) bool {
840 | for item := range s.source {
841 | if !fn(item) {
842 | // need to drain s.source, otherwise the previous goroutine may block
843 | go drain(s.source)
844 | return false
845 | }
846 | }
847 |
848 | return true
849 | }
850 | ```
851 |
852 | #### Arbitrary Match AnyMatch
853 |
854 | ```Go
855 | func (s Stream) AnyMatch(fn PredicateFunc) bool {
856 | for item := range s.source {
857 | if fn(item) {
858 | // need to drain s.source, otherwise the previous goroutine may block
859 | go drain(s.source)
860 | return true
861 | }
862 | }
863 |
864 | return false
865 | }
866 | ```
867 |
868 | #### NoneMatch
869 |
870 | ```Go
871 | func (s Stream) NoneMatch(fn func(item interface{}) bool) bool {
872 | for item := range s.source {
873 | if fn(item) {
874 | // need to drain s.source, otherwise the previous goroutine may block
875 | go drain(s.source)
876 | return false
877 | }
878 | }
879 |
880 | return true
881 | }
882 | ```
883 |
884 | #### Quantity count Count
885 |
886 | ```Go
887 | func (s Stream) Count() int {
888 | var count int
889 | for range s.source {
890 | count++
891 | }
892 | return count
893 | }
894 | ```
895 |
896 | #### Clear Done
897 |
898 | ```Go
899 | func (s Stream) Done() {
900 | // Drain the channel to prevent goroutine blocking leaks
901 | drain(s.source)
902 | }
903 | ```
904 |
905 | #### Iterate over all elements ForAll
906 |
907 | ```Go
908 | func (s Stream) ForAll(fn ForAllFunc) {
909 | fn(s.source)
910 | }
911 | ```
912 |
913 | #### Iterate over each element ForEach
914 |
915 | ```Go
916 | func (s Stream) ForEach(fn ForEachFunc) {
917 | for item := range s.source {
918 | fn(item)
919 | }
920 | }
921 | ```
922 |
923 | ## Summary
924 |
925 | The core logic is to use the channel as a pipe and the data as a stream, and to continuously receive/write data to the channel using a concurrent process to achieve an asynchronous non-blocking effect.
926 |
927 | The basis for this efficiency comes from three language features.
928 |
929 | - channel
930 | - concurrency
931 | - functional programming
932 |
933 | ## References
934 |
935 | go-zero: [https://github.com/zeromicro/go-zero](https://github.com/zeromicro/go-zero)
936 |
937 | ## Give a Star! ⭐
938 |
939 | If you like or are using this project to learn or start your solution, please give it a star. Thanks!
--------------------------------------------------------------------------------
/ring.go:
--------------------------------------------------------------------------------
1 | package stream
2 |
3 | import "sync"
4 |
5 | // A Ring can be used as fixed size ring.
6 | type Ring struct {
7 | elements []interface{}
8 | index int
9 | lock sync.Mutex
10 | }
11 |
12 | // NewRing returns a Ring object with the given size n.
13 | func NewRing(n int) *Ring {
14 | if n < 1 {
15 | panic("n should be greater than 0")
16 | }
17 |
18 | return &Ring{
19 | elements: make([]interface{}, n),
20 | }
21 | }
22 |
23 | // Add adds v into r.
24 | func (r *Ring) Add(v interface{}) {
25 | r.lock.Lock()
26 | defer r.lock.Unlock()
27 |
28 | r.elements[r.index%len(r.elements)] = v
29 | r.index++
30 | }
31 |
32 | // Take takes all items from r.
33 | func (r *Ring) Take() []interface{} {
34 | r.lock.Lock()
35 | defer r.lock.Unlock()
36 |
37 | var size int
38 | var start int
39 | if r.index > len(r.elements) {
40 | size = len(r.elements)
41 | start = r.index % len(r.elements)
42 | } else {
43 | size = r.index
44 | }
45 |
46 | elements := make([]interface{}, size)
47 | for i := 0; i < size; i++ {
48 | elements[i] = r.elements[(start+i)%len(r.elements)]
49 | }
50 |
51 | return elements
52 | }
53 |
--------------------------------------------------------------------------------
/ring_test.go:
--------------------------------------------------------------------------------
1 | package stream
2 |
3 | import (
4 | "sync"
5 | "testing"
6 |
7 | "github.com/stretchr/testify/assert"
8 | )
9 |
10 | func TestNewRing(t *testing.T) {
11 | assert.Panics(t, func() {
12 | NewRing(0)
13 | })
14 | }
15 |
16 | func TestRingLess(t *testing.T) {
17 | ring := NewRing(5)
18 | for i := 0; i < 3; i++ {
19 | ring.Add(i)
20 | }
21 | elements := ring.Take()
22 | assert.ElementsMatch(t, []interface{}{0, 1, 2}, elements)
23 | }
24 |
25 | func TestRingMore(t *testing.T) {
26 | ring := NewRing(5)
27 | for i := 0; i < 11; i++ {
28 | ring.Add(i)
29 | }
30 | elements := ring.Take()
31 | assert.ElementsMatch(t, []interface{}{6, 7, 8, 9, 10}, elements)
32 | }
33 |
34 | func TestRingAdd(t *testing.T) {
35 | ring := NewRing(5051)
36 | wg := sync.WaitGroup{}
37 | for i := 1; i <= 100; i++ {
38 | wg.Add(1)
39 | go func(i int) {
40 | defer wg.Done()
41 | for j := 1; j <= i; j++ {
42 | ring.Add(i)
43 | }
44 | }(i)
45 | }
46 | wg.Wait()
47 | assert.Equal(t, 5050, len(ring.Take()))
48 | }
49 |
50 | func BenchmarkRingAdd(b *testing.B) {
51 | ring := NewRing(500)
52 | b.RunParallel(func(pb *testing.PB) {
53 | for pb.Next() {
54 | for i := 0; i < b.N; i++ {
55 | ring.Add(i)
56 | }
57 | }
58 | })
59 | }
60 |
--------------------------------------------------------------------------------
/routinegroup.go:
--------------------------------------------------------------------------------
1 | package stream
2 |
3 | import "sync"
4 |
5 | // A RoutineGroup is used to group goroutines together and all wait all goroutines to be done.
6 | type RoutineGroup struct {
7 | waitGroup sync.WaitGroup
8 | }
9 |
10 | // NewRoutineGroup returns a RoutineGroup.
11 | func NewRoutineGroup() *RoutineGroup {
12 | return new(RoutineGroup)
13 | }
14 |
15 | // Run runs the given fn in RoutineGroup.
16 | // Don't reference the variables from outside,
17 | // because outside variables can be changed by other goroutines
18 | func (g *RoutineGroup) Run(fn func()) {
19 | g.waitGroup.Add(1)
20 |
21 | go func() {
22 | defer g.waitGroup.Done()
23 | fn()
24 | }()
25 | }
26 |
27 | // Wait waits all running functions to be done.
28 | func (g *RoutineGroup) Wait() {
29 | g.waitGroup.Wait()
30 | }
31 |
--------------------------------------------------------------------------------
/routinegroup_test.go:
--------------------------------------------------------------------------------
1 | package stream
2 |
3 | import (
4 | "sync/atomic"
5 | "testing"
6 |
7 | "github.com/stretchr/testify/assert"
8 | )
9 |
10 | func TestRoutineGroupRun(t *testing.T) {
11 | var count int32
12 | group := NewRoutineGroup()
13 | for i := 0; i < 3; i++ {
14 | group.Run(func() {
15 | atomic.AddInt32(&count, 1)
16 | })
17 | }
18 |
19 | group.Wait()
20 |
21 | assert.Equal(t, int32(3), count)
22 | }
23 |
--------------------------------------------------------------------------------
/stream.go:
--------------------------------------------------------------------------------
1 | package stream
2 |
3 | import (
4 | "sort"
5 | "sync"
6 | )
7 |
8 | const (
9 | defaultWorkers = 16
10 | minWorkers = 1
11 | )
12 |
13 | type (
14 | rxOptions struct {
15 | unlimitedWorkers bool
16 | workers int
17 | }
18 |
19 | // FilterFunc defines the method to filter a Stream.
20 | FilterFunc func(item interface{}) bool
21 | // ForAllFunc defines the method to handle all elements in a Stream.
22 | ForAllFunc func(pipe <-chan interface{})
23 | // ForEachFunc defines the method to handle each element in a Stream.
24 | ForEachFunc func(item interface{})
25 | // GenerateFunc defines the method to send elements into a Stream.
26 | GenerateFunc func(source chan<- interface{})
27 | // KeyFunc defines the method to generate keys for the elements in a Stream.
28 | KeyFunc func(item interface{}) interface{}
29 | // LessFunc defines the method to compare the elements in a Stream.
30 | LessFunc func(a, b interface{}) bool
31 | // MapFunc defines the method to map each element to another object in a Stream.
32 | MapFunc func(item interface{}) interface{}
33 | // Option defines the method to customize a Stream.
34 | Option func(opts *rxOptions)
35 | // ParallelFunc defines the method to handle elements parallelly.
36 | ParallelFunc func(item interface{})
37 | // ReduceFunc defines the method to reduce all the elements in a Stream.
38 | ReduceFunc func(pipe <-chan interface{}) (interface{}, error)
39 | // WalkFunc defines the method to walk through all the elements in a Stream.
40 | WalkFunc func(item interface{}, pipe chan<- interface{})
41 |
42 | // A Stream is a stream that can be used to do stream processing.
43 | Stream struct {
44 | source <-chan interface{}
45 | }
46 | )
47 |
48 | // Concat returns a concatenated Stream.
49 | func Concat(s Stream, others ...Stream) Stream {
50 | return s.Concat(others...)
51 | }
52 |
53 | // From constructs a Stream from the given GenerateFunc.
54 | func From(generate GenerateFunc) Stream {
55 | source := make(chan interface{})
56 |
57 | go func() {
58 | defer close(source)
59 | generate(source)
60 | }()
61 |
62 | return Range(source)
63 | }
64 |
65 | // Just converts the given arbitrary items to a Stream.
66 | func Just(items ...interface{}) Stream {
67 | source := make(chan interface{}, len(items))
68 | for _, item := range items {
69 | source <- item
70 | }
71 | close(source)
72 |
73 | return Range(source)
74 | }
75 |
76 | // Range converts the given channel to a Stream.
77 | func Range(source <-chan interface{}) Stream {
78 | return Stream{
79 | source: source,
80 | }
81 | }
82 |
83 | // AllMach returns whether all elements of this stream match the provided predicate.
84 | // May not evaluate the predicate on all elements if not necessary for determining the result.
85 | // If the stream is empty then true is returned and the predicate is not evaluated.
86 | func (s Stream) AllMach(predicate func(item interface{}) bool) bool {
87 | for item := range s.source {
88 | if !predicate(item) {
89 | // make sure the former goroutine not block, and current func returns fast.
90 | go drain(s.source)
91 | return false
92 | }
93 | }
94 |
95 | return true
96 | }
97 |
98 | // AnyMach returns whether any elements of this stream match the provided predicate.
99 | // May not evaluate the predicate on all elements if not necessary for determining the result.
100 | // If the stream is empty then false is returned and the predicate is not evaluated.
101 | func (s Stream) AnyMach(predicate func(item interface{}) bool) bool {
102 | for item := range s.source {
103 | if predicate(item) {
104 | // make sure the former goroutine not block, and current func returns fast.
105 | go drain(s.source)
106 | return true
107 | }
108 | }
109 |
110 | return false
111 | }
112 |
113 | // Buffer buffers the items into a queue with size n.
114 | // It can balance the producer and the consumer if their processing throughput don't match.
115 | func (s Stream) Buffer(n int) Stream {
116 | if n < 0 {
117 | n = 0
118 | }
119 |
120 | source := make(chan interface{}, n)
121 | go func() {
122 | for item := range s.source {
123 | source <- item
124 | }
125 | close(source)
126 | }()
127 |
128 | return Range(source)
129 | }
130 |
131 | // Concat returns a Stream that concatenated other streams
132 | func (s Stream) Concat(others ...Stream) Stream {
133 | source := make(chan interface{})
134 |
135 | go func() {
136 | group := NewRoutineGroup()
137 | group.Run(func() {
138 | for item := range s.source {
139 | source <- item
140 | }
141 | })
142 |
143 | for _, each := range others {
144 | each := each
145 | group.Run(func() {
146 | for item := range each.source {
147 | source <- item
148 | }
149 | })
150 | }
151 |
152 | group.Wait()
153 | close(source)
154 | }()
155 |
156 | return Range(source)
157 | }
158 |
159 | // Count counts the number of elements in the result.
160 | func (s Stream) Count() (count int) {
161 | for range s.source {
162 | count++
163 | }
164 | return
165 | }
166 |
167 | // Distinct removes the duplicated items base on the given KeyFunc.
168 | func (s Stream) Distinct(fn KeyFunc) Stream {
169 | source := make(chan interface{})
170 |
171 | go func() {
172 | defer close(source)
173 |
174 | keys := make(map[interface{}]struct{})
175 | for item := range s.source {
176 | key := fn(item)
177 | if _, ok := keys[key]; !ok {
178 | source <- item
179 | keys[key] = struct{}{}
180 | }
181 | }
182 | }()
183 |
184 | return Range(source)
185 | }
186 |
187 | // Done waits all upstreaming operations to be done.
188 | func (s Stream) Done() {
189 | drain(s.source)
190 | }
191 |
192 | // Filter filters the items by the given FilterFunc.
193 | func (s Stream) Filter(fn FilterFunc, opts ...Option) Stream {
194 | return s.Walk(func(item interface{}, pipe chan<- interface{}) {
195 | if fn(item) {
196 | pipe <- item
197 | }
198 | }, opts...)
199 | }
200 |
201 | // First returns the first item, nil if no items.
202 | func (s Stream) First() interface{} {
203 | for item := range s.source {
204 | // make sure the former goroutine not block, and current func returns fast.
205 | go drain(s.source)
206 | return item
207 | }
208 |
209 | return nil
210 | }
211 |
212 | // ForAll handles the streaming elements from the source and no later streams.
213 | func (s Stream) ForAll(fn ForAllFunc) {
214 | fn(s.source)
215 | // avoid goroutine leak on fn not consuming all items.
216 | go drain(s.source)
217 | }
218 |
219 | // ForEach seals the Stream with the ForEachFunc on each item, no successive operations.
220 | func (s Stream) ForEach(fn ForEachFunc) {
221 | for item := range s.source {
222 | fn(item)
223 | }
224 | }
225 |
226 | // Group groups the elements into different groups based on their keys.
227 | func (s Stream) Group(fn KeyFunc) Stream {
228 | groups := make(map[interface{}][]interface{})
229 | for item := range s.source {
230 | key := fn(item)
231 | groups[key] = append(groups[key], item)
232 | }
233 |
234 | source := make(chan interface{})
235 | go func() {
236 | for _, group := range groups {
237 | source <- group
238 | }
239 | close(source)
240 | }()
241 |
242 | return Range(source)
243 | }
244 |
245 | // Head returns the first n elements in p.
246 | func (s Stream) Head(n int64) Stream {
247 | if n < 1 {
248 | panic("n must be greater than 0")
249 | }
250 |
251 | source := make(chan interface{})
252 |
253 | go func() {
254 | for item := range s.source {
255 | n--
256 | if n >= 0 {
257 | source <- item
258 | }
259 | if n == 0 {
260 | // let successive method go ASAP even we have more items to skip
261 | close(source)
262 | // why we don't just break the loop, and drain to consume all items.
263 | // because if breaks, this former goroutine will block forever,
264 | // which will cause goroutine leak.
265 | drain(s.source)
266 | }
267 | }
268 | // not enough items in s.source, but we need to let successive method to go ASAP.
269 | if n > 0 {
270 | close(source)
271 | }
272 | }()
273 |
274 | return Range(source)
275 | }
276 |
277 | // Last returns the last item, or nil if no items.
278 | func (s Stream) Last() (item interface{}) {
279 | for item = range s.source {
280 | }
281 | return
282 | }
283 |
284 | // Map converts each item to another corresponding item, which means it's a 1:1 model.
285 | func (s Stream) Map(fn MapFunc, opts ...Option) Stream {
286 | return s.Walk(func(item interface{}, pipe chan<- interface{}) {
287 | pipe <- fn(item)
288 | }, opts...)
289 | }
290 |
291 | // Merge merges all the items into a slice and generates a new stream.
292 | func (s Stream) Merge() Stream {
293 | var items []interface{}
294 | for item := range s.source {
295 | items = append(items, item)
296 | }
297 |
298 | source := make(chan interface{}, 1)
299 | source <- items
300 | close(source)
301 |
302 | return Range(source)
303 | }
304 |
305 | // NoneMatch returns whether all elements of this stream don't match the provided predicate.
306 | // May not evaluate the predicate on all elements if not necessary for determining the result.
307 | // If the stream is empty then true is returned and the predicate is not evaluated.
308 | func (s Stream) NoneMatch(predicate func(item interface{}) bool) bool {
309 | for item := range s.source {
310 | if predicate(item) {
311 | // make sure the former goroutine not block, and current func returns fast.
312 | go drain(s.source)
313 | return false
314 | }
315 | }
316 |
317 | return true
318 | }
319 |
320 | // Parallel applies the given ParallelFunc to each item concurrently with given number of workers.
321 | func (s Stream) Parallel(fn ParallelFunc, opts ...Option) {
322 | s.Walk(func(item interface{}, pipe chan<- interface{}) {
323 | fn(item)
324 | }, opts...).Done()
325 | }
326 |
327 | // Reduce is a utility method to let the caller deal with the underlying channel.
328 | func (s Stream) Reduce(fn ReduceFunc) (interface{}, error) {
329 | return fn(s.source)
330 | }
331 |
332 | // Reverse reverses the elements in the stream.
333 | func (s Stream) Reverse() Stream {
334 | var items []interface{}
335 | for item := range s.source {
336 | items = append(items, item)
337 | }
338 | // reverse, official method
339 | for i := len(items)/2 - 1; i >= 0; i-- {
340 | opp := len(items) - 1 - i
341 | items[i], items[opp] = items[opp], items[i]
342 | }
343 |
344 | return Just(items...)
345 | }
346 |
347 | // Skip returns a Stream that skips size elements.
348 | func (s Stream) Skip(n int64) Stream {
349 | if n < 0 {
350 | panic("n must not be negative")
351 | }
352 | if n == 0 {
353 | return s
354 | }
355 |
356 | source := make(chan interface{})
357 |
358 | go func() {
359 | for item := range s.source {
360 | n--
361 | if n >= 0 {
362 | continue
363 | } else {
364 | source <- item
365 | }
366 | }
367 | close(source)
368 | }()
369 |
370 | return Range(source)
371 | }
372 |
373 | // Sort sorts the items from the underlying source.
374 | func (s Stream) Sort(less LessFunc) Stream {
375 | var items []interface{}
376 | for item := range s.source {
377 | items = append(items, item)
378 | }
379 | sort.Slice(items, func(i, j int) bool {
380 | return less(items[i], items[j])
381 | })
382 |
383 | return Just(items...)
384 | }
385 |
386 | // Split splits the elements into chunk with size up to n,
387 | // might be less than n on tailing elements.
388 | func (s Stream) Split(n int) Stream {
389 | if n < 1 {
390 | panic("n should be greater than 0")
391 | }
392 |
393 | source := make(chan interface{})
394 | go func() {
395 | var chunk []interface{}
396 | for item := range s.source {
397 | chunk = append(chunk, item)
398 | if len(chunk) == n {
399 | source <- chunk
400 | chunk = nil
401 | }
402 | }
403 | if chunk != nil {
404 | source <- chunk
405 | }
406 | close(source)
407 | }()
408 |
409 | return Range(source)
410 | }
411 |
412 | // Tail returns the last n elements in p.
413 | func (s Stream) Tail(n int64) Stream {
414 | if n < 1 {
415 | panic("n should be greater than 0")
416 | }
417 |
418 | source := make(chan interface{})
419 |
420 | go func() {
421 | ring := NewRing(int(n))
422 | for item := range s.source {
423 | ring.Add(item)
424 | }
425 | for _, item := range ring.Take() {
426 | source <- item
427 | }
428 | close(source)
429 | }()
430 |
431 | return Range(source)
432 | }
433 |
434 | // Walk lets the callers handle each item, the caller may write zero, one or more items base on the given item.
435 | func (s Stream) Walk(fn WalkFunc, opts ...Option) Stream {
436 | option := buildOptions(opts...)
437 | if option.unlimitedWorkers {
438 | return s.walkUnlimited(fn, option)
439 | }
440 |
441 | return s.walkLimited(fn, option)
442 | }
443 |
444 | func (s Stream) walkLimited(fn WalkFunc, option *rxOptions) Stream {
445 | pipe := make(chan interface{}, option.workers)
446 |
447 | go func() {
448 | var wg sync.WaitGroup
449 | pool := make(chan struct{}, option.workers)
450 |
451 | for item := range s.source {
452 | // important, used in another goroutine
453 | val := item
454 | pool <- struct{}{}
455 | wg.Add(1)
456 |
457 | go func() {
458 | defer func() {
459 | wg.Done()
460 | <-pool
461 | }()
462 |
463 | fn(val, pipe)
464 | }()
465 | }
466 |
467 | wg.Wait()
468 | close(pipe)
469 | }()
470 |
471 | return Range(pipe)
472 | }
473 |
474 | func (s Stream) walkUnlimited(fn WalkFunc, option *rxOptions) Stream {
475 | pipe := make(chan interface{}, option.workers)
476 |
477 | go func() {
478 | var wg sync.WaitGroup
479 |
480 | for item := range s.source {
481 | // important, used in another goroutine
482 | val := item
483 | wg.Add(1)
484 | go func() {
485 | defer wg.Done()
486 | fn(val, pipe)
487 | }()
488 | }
489 |
490 | wg.Wait()
491 | close(pipe)
492 | }()
493 |
494 | return Range(pipe)
495 | }
496 |
497 | // UnlimitedWorkers lets the caller use as many workers as the tasks.
498 | func UnlimitedWorkers() Option {
499 | return func(opts *rxOptions) {
500 | opts.unlimitedWorkers = true
501 | }
502 | }
503 |
504 | // WithWorkers lets the caller customize the concurrent workers.
505 | func WithWorkers(workers int) Option {
506 | return func(opts *rxOptions) {
507 | if workers < minWorkers {
508 | opts.workers = minWorkers
509 | } else {
510 | opts.workers = workers
511 | }
512 | }
513 | }
514 |
515 | // buildOptions returns a rxOptions with given customizations.
516 | func buildOptions(opts ...Option) *rxOptions {
517 | options := newOptions()
518 | for _, opt := range opts {
519 | opt(options)
520 | }
521 |
522 | return options
523 | }
524 |
525 | // drain drains the given channel.
526 | func drain(channel <-chan interface{}) {
527 | for range channel {
528 | }
529 | }
530 |
531 | // newOptions returns a default rxOptions.
532 | func newOptions() *rxOptions {
533 | return &rxOptions{
534 | workers: defaultWorkers,
535 | }
536 | }
537 |
--------------------------------------------------------------------------------
/stream_test.go:
--------------------------------------------------------------------------------
1 | package stream
2 |
3 | import (
4 | "io/ioutil"
5 | "log"
6 | "math/rand"
7 | "reflect"
8 | "runtime"
9 | "sort"
10 | "sync"
11 | "sync/atomic"
12 | "testing"
13 | "time"
14 |
15 | "github.com/stretchr/testify/assert"
16 | )
17 |
18 | func TestBuffer(t *testing.T) {
19 | runCheckedTest(t, func(t *testing.T) {
20 | const N = 5
21 | var count int32
22 | var wait sync.WaitGroup
23 | wait.Add(1)
24 | From(func(source chan<- interface{}) {
25 | ticker := time.NewTicker(10 * time.Millisecond)
26 | defer ticker.Stop()
27 |
28 | for i := 0; i < 2*N; i++ {
29 | select {
30 | case source <- i:
31 | atomic.AddInt32(&count, 1)
32 | case <-ticker.C:
33 | wait.Done()
34 | return
35 | }
36 | }
37 | }).Buffer(N).ForAll(func(pipe <-chan interface{}) {
38 | wait.Wait()
39 | // why N+1, because take one more to wait for sending into the channel
40 | assert.Equal(t, int32(N+1), atomic.LoadInt32(&count))
41 | })
42 | })
43 | }
44 |
45 | func TestBufferNegative(t *testing.T) {
46 | runCheckedTest(t, func(t *testing.T) {
47 | var result int
48 | Just(1, 2, 3, 4).Buffer(-1).Reduce(func(pipe <-chan interface{}) (interface{}, error) {
49 | for item := range pipe {
50 | result += item.(int)
51 | }
52 | return result, nil
53 | })
54 | assert.Equal(t, 10, result)
55 | })
56 | }
57 |
58 | func TestCount(t *testing.T) {
59 | runCheckedTest(t, func(t *testing.T) {
60 | tests := []struct {
61 | name string
62 | elements []interface{}
63 | }{
64 | {
65 | name: "no elements with nil",
66 | },
67 | {
68 | name: "no elements",
69 | elements: []interface{}{},
70 | },
71 | {
72 | name: "1 element",
73 | elements: []interface{}{1},
74 | },
75 | {
76 | name: "multiple elements",
77 | elements: []interface{}{1, 2, 3},
78 | },
79 | }
80 |
81 | for _, test := range tests {
82 | t.Run(test.name, func(t *testing.T) {
83 | val := Just(test.elements...).Count()
84 | assert.Equal(t, len(test.elements), val)
85 | })
86 | }
87 | })
88 | }
89 |
90 | func TestDone(t *testing.T) {
91 | runCheckedTest(t, func(t *testing.T) {
92 | var count int32
93 | Just(1, 2, 3).Walk(func(item interface{}, pipe chan<- interface{}) {
94 | time.Sleep(time.Millisecond * 100)
95 | atomic.AddInt32(&count, int32(item.(int)))
96 | }).Done()
97 | assert.Equal(t, int32(6), count)
98 | })
99 | }
100 |
101 | func TestJust(t *testing.T) {
102 | runCheckedTest(t, func(t *testing.T) {
103 | var result int
104 | Just(1, 2, 3, 4).Reduce(func(pipe <-chan interface{}) (interface{}, error) {
105 | for item := range pipe {
106 | result += item.(int)
107 | }
108 | return result, nil
109 | })
110 | assert.Equal(t, 10, result)
111 | })
112 | }
113 |
114 | func TestDistinct(t *testing.T) {
115 | runCheckedTest(t, func(t *testing.T) {
116 | var result int
117 | Just(4, 1, 3, 2, 3, 4).Distinct(func(item interface{}) interface{} {
118 | return item
119 | }).Reduce(func(pipe <-chan interface{}) (interface{}, error) {
120 | for item := range pipe {
121 | result += item.(int)
122 | }
123 | return result, nil
124 | })
125 | assert.Equal(t, 10, result)
126 | })
127 | }
128 |
129 | func TestFilter(t *testing.T) {
130 | runCheckedTest(t, func(t *testing.T) {
131 | var result int
132 | Just(1, 2, 3, 4).Filter(func(item interface{}) bool {
133 | return item.(int)%2 == 0
134 | }).Reduce(func(pipe <-chan interface{}) (interface{}, error) {
135 | for item := range pipe {
136 | result += item.(int)
137 | }
138 | return result, nil
139 | })
140 | assert.Equal(t, 6, result)
141 | })
142 | }
143 |
144 | func TestFirst(t *testing.T) {
145 | runCheckedTest(t, func(t *testing.T) {
146 | assert.Nil(t, Just().First())
147 | assert.Equal(t, "foo", Just("foo").First())
148 | assert.Equal(t, "foo", Just("foo", "bar").First())
149 | })
150 | }
151 |
152 | func TestForAll(t *testing.T) {
153 | runCheckedTest(t, func(t *testing.T) {
154 | var result int
155 | Just(1, 2, 3, 4).Filter(func(item interface{}) bool {
156 | return item.(int)%2 == 0
157 | }).ForAll(func(pipe <-chan interface{}) {
158 | for item := range pipe {
159 | result += item.(int)
160 | }
161 | })
162 | assert.Equal(t, 6, result)
163 | })
164 | }
165 |
166 | func TestGroup(t *testing.T) {
167 | runCheckedTest(t, func(t *testing.T) {
168 | var groups [][]int
169 | Just(10, 11, 20, 21).Group(func(item interface{}) interface{} {
170 | v := item.(int)
171 | return v / 10
172 | }).ForEach(func(item interface{}) {
173 | v := item.([]interface{})
174 | var group []int
175 | for _, each := range v {
176 | group = append(group, each.(int))
177 | }
178 | groups = append(groups, group)
179 | })
180 |
181 | assert.Equal(t, 2, len(groups))
182 | for _, group := range groups {
183 | assert.Equal(t, 2, len(group))
184 | assert.True(t, group[0]/10 == group[1]/10)
185 | }
186 | })
187 | }
188 |
189 | func TestHead(t *testing.T) {
190 | runCheckedTest(t, func(t *testing.T) {
191 | var result int
192 | Just(1, 2, 3, 4).Head(2).Reduce(func(pipe <-chan interface{}) (interface{}, error) {
193 | for item := range pipe {
194 | result += item.(int)
195 | }
196 | return result, nil
197 | })
198 | assert.Equal(t, 3, result)
199 | })
200 | }
201 |
202 | func TestHeadZero(t *testing.T) {
203 | runCheckedTest(t, func(t *testing.T) {
204 | assert.Panics(t, func() {
205 | Just(1, 2, 3, 4).Head(0).Reduce(func(pipe <-chan interface{}) (interface{}, error) {
206 | return nil, nil
207 | })
208 | })
209 | })
210 | }
211 |
212 | func TestHeadMore(t *testing.T) {
213 | runCheckedTest(t, func(t *testing.T) {
214 | var result int
215 | Just(1, 2, 3, 4).Head(6).Reduce(func(pipe <-chan interface{}) (interface{}, error) {
216 | for item := range pipe {
217 | result += item.(int)
218 | }
219 | return result, nil
220 | })
221 | assert.Equal(t, 10, result)
222 | })
223 | }
224 |
225 | func TestLast(t *testing.T) {
226 | runCheckedTest(t, func(t *testing.T) {
227 | goroutines := runtime.NumGoroutine()
228 | assert.Nil(t, Just().Last())
229 | assert.Equal(t, "foo", Just("foo").Last())
230 | assert.Equal(t, "bar", Just("foo", "bar").Last())
231 | // let scheduler schedule first
232 | runtime.Gosched()
233 | assert.Equal(t, goroutines, runtime.NumGoroutine())
234 | })
235 | }
236 |
237 | func TestMap(t *testing.T) {
238 | runCheckedTest(t, func(t *testing.T) {
239 | log.SetOutput(ioutil.Discard)
240 |
241 | tests := []struct {
242 | name string
243 | mapper MapFunc
244 | expect int
245 | }{
246 | {
247 | name: "map with square",
248 | mapper: func(item interface{}) interface{} {
249 | v := item.(int)
250 | return v * v
251 | },
252 | expect: 30,
253 | },
254 | {
255 | name: "map ignore half",
256 | mapper: func(item interface{}) interface{} {
257 | v := item.(int)
258 | if v%2 == 0 {
259 | return 0
260 | }
261 | return v * v
262 | },
263 | expect: 10,
264 | },
265 | }
266 |
267 | // Map(...) works even WithWorkers(0)
268 | for i, test := range tests {
269 | t.Run(test.name, func(t *testing.T) {
270 | var result int
271 | var workers int
272 | if i%2 == 0 {
273 | workers = 0
274 | } else {
275 | workers = runtime.NumCPU()
276 | }
277 | From(func(source chan<- interface{}) {
278 | for i := 1; i < 5; i++ {
279 | source <- i
280 | }
281 | }).Map(test.mapper, WithWorkers(workers)).Reduce(
282 | func(pipe <-chan interface{}) (interface{}, error) {
283 | for item := range pipe {
284 | result += item.(int)
285 | }
286 | return result, nil
287 | })
288 |
289 | assert.Equal(t, test.expect, result)
290 | })
291 | }
292 | })
293 | }
294 |
295 | func TestMerge(t *testing.T) {
296 | runCheckedTest(t, func(t *testing.T) {
297 | Just(1, 2, 3, 4).Merge().ForEach(func(item interface{}) {
298 | assert.ElementsMatch(t, []interface{}{1, 2, 3, 4}, item.([]interface{}))
299 | })
300 | })
301 | }
302 |
303 | func TestParallelJust(t *testing.T) {
304 | runCheckedTest(t, func(t *testing.T) {
305 | var count int32
306 | Just(1, 2, 3).Parallel(func(item interface{}) {
307 | time.Sleep(time.Millisecond * 100)
308 | atomic.AddInt32(&count, int32(item.(int)))
309 | }, UnlimitedWorkers())
310 | assert.Equal(t, int32(6), count)
311 | })
312 | }
313 |
314 | func TestReverse(t *testing.T) {
315 | runCheckedTest(t, func(t *testing.T) {
316 | Just(1, 2, 3, 4).Reverse().Merge().ForEach(func(item interface{}) {
317 | assert.ElementsMatch(t, []interface{}{4, 3, 2, 1}, item.([]interface{}))
318 | })
319 | })
320 | }
321 |
322 | func TestSort(t *testing.T) {
323 | runCheckedTest(t, func(t *testing.T) {
324 | var prev int
325 | Just(5, 3, 7, 1, 9, 6, 4, 8, 2).Sort(func(a, b interface{}) bool {
326 | return a.(int) < b.(int)
327 | }).ForEach(func(item interface{}) {
328 | next := item.(int)
329 | assert.True(t, prev < next)
330 | prev = next
331 | })
332 | })
333 | }
334 |
335 | func TestSplit(t *testing.T) {
336 | runCheckedTest(t, func(t *testing.T) {
337 | assert.Panics(t, func() {
338 | Just(1, 2, 3, 4, 5, 6, 7, 8, 9, 10).Split(0).Done()
339 | })
340 | var chunks [][]interface{}
341 | Just(1, 2, 3, 4, 5, 6, 7, 8, 9, 10).Split(4).ForEach(func(item interface{}) {
342 | chunk := item.([]interface{})
343 | chunks = append(chunks, chunk)
344 | })
345 | assert.EqualValues(t, [][]interface{}{
346 | {1, 2, 3, 4},
347 | {5, 6, 7, 8},
348 | {9, 10},
349 | }, chunks)
350 | })
351 | }
352 |
353 | func TestTail(t *testing.T) {
354 | runCheckedTest(t, func(t *testing.T) {
355 | var result int
356 | Just(1, 2, 3, 4).Tail(2).Reduce(func(pipe <-chan interface{}) (interface{}, error) {
357 | for item := range pipe {
358 | result += item.(int)
359 | }
360 | return result, nil
361 | })
362 | assert.Equal(t, 7, result)
363 | })
364 | }
365 |
366 | func TestTailZero(t *testing.T) {
367 | runCheckedTest(t, func(t *testing.T) {
368 | assert.Panics(t, func() {
369 | Just(1, 2, 3, 4).Tail(0).Reduce(func(pipe <-chan interface{}) (interface{}, error) {
370 | return nil, nil
371 | })
372 | })
373 | })
374 | }
375 |
376 | func TestWalk(t *testing.T) {
377 | runCheckedTest(t, func(t *testing.T) {
378 | var result int
379 | Just(1, 2, 3, 4, 5).Walk(func(item interface{}, pipe chan<- interface{}) {
380 | if item.(int)%2 != 0 {
381 | pipe <- item
382 | }
383 | }, UnlimitedWorkers()).ForEach(func(item interface{}) {
384 | result += item.(int)
385 | })
386 | assert.Equal(t, 9, result)
387 | })
388 | }
389 |
390 | func TestStream_AnyMach(t *testing.T) {
391 | runCheckedTest(t, func(t *testing.T) {
392 | assetEqual(t, false, Just(1, 2, 3).AnyMach(func(item interface{}) bool {
393 | return item.(int) == 4
394 | }))
395 | assetEqual(t, false, Just(1, 2, 3).AnyMach(func(item interface{}) bool {
396 | return item.(int) == 0
397 | }))
398 | assetEqual(t, true, Just(1, 2, 3).AnyMach(func(item interface{}) bool {
399 | return item.(int) == 2
400 | }))
401 | assetEqual(t, true, Just(1, 2, 3).AnyMach(func(item interface{}) bool {
402 | return item.(int) == 2
403 | }))
404 | })
405 | }
406 |
407 | func TestStream_AllMach(t *testing.T) {
408 | runCheckedTest(t, func(t *testing.T) {
409 | assetEqual(
410 | t, true, Just(1, 2, 3).AllMach(func(item interface{}) bool {
411 | return true
412 | }),
413 | )
414 | assetEqual(
415 | t, false, Just(1, 2, 3).AllMach(func(item interface{}) bool {
416 | return false
417 | }),
418 | )
419 | assetEqual(
420 | t, false, Just(1, 2, 3).AllMach(func(item interface{}) bool {
421 | return item.(int) == 1
422 | }),
423 | )
424 | })
425 | }
426 |
427 | func TestStream_NoneMatch(t *testing.T) {
428 | runCheckedTest(t, func(t *testing.T) {
429 | assetEqual(
430 | t, true, Just(1, 2, 3).NoneMatch(func(item interface{}) bool {
431 | return false
432 | }),
433 | )
434 | assetEqual(
435 | t, false, Just(1, 2, 3).NoneMatch(func(item interface{}) bool {
436 | return true
437 | }),
438 | )
439 | assetEqual(
440 | t, true, Just(1, 2, 3).NoneMatch(func(item interface{}) bool {
441 | return item.(int) == 4
442 | }),
443 | )
444 | })
445 | }
446 |
447 | func TestConcat(t *testing.T) {
448 | runCheckedTest(t, func(t *testing.T) {
449 | a1 := []interface{}{1, 2, 3}
450 | a2 := []interface{}{4, 5, 6}
451 | s1 := Just(a1...)
452 | s2 := Just(a2...)
453 | stream := Concat(s1, s2)
454 | var items []interface{}
455 | for item := range stream.source {
456 | items = append(items, item)
457 | }
458 | sort.Slice(items, func(i, j int) bool {
459 | return items[i].(int) < items[j].(int)
460 | })
461 | ints := make([]interface{}, 0)
462 | ints = append(ints, a1...)
463 | ints = append(ints, a2...)
464 | assetEqual(t, ints, items)
465 | })
466 | }
467 |
468 | func TestStream_Skip(t *testing.T) {
469 | runCheckedTest(t, func(t *testing.T) {
470 | assetEqual(t, 3, Just(1, 2, 3, 4).Skip(1).Count())
471 | assetEqual(t, 1, Just(1, 2, 3, 4).Skip(3).Count())
472 | assetEqual(t, 4, Just(1, 2, 3, 4).Skip(0).Count())
473 | equal(t, Just(1, 2, 3, 4).Skip(3), []interface{}{4})
474 | assert.Panics(t, func() {
475 | Just(1, 2, 3, 4).Skip(-1)
476 | })
477 | })
478 | }
479 |
480 | func TestStream_Concat(t *testing.T) {
481 | runCheckedTest(t, func(t *testing.T) {
482 | stream := Just(1).Concat(Just(2), Just(3))
483 | var items []interface{}
484 | for item := range stream.source {
485 | items = append(items, item)
486 | }
487 | sort.Slice(items, func(i, j int) bool {
488 | return items[i].(int) < items[j].(int)
489 | })
490 | assetEqual(t, []interface{}{1, 2, 3}, items)
491 |
492 | just := Just(1)
493 | equal(t, just.Concat(just), []interface{}{1})
494 | })
495 | }
496 |
497 | func BenchmarkParallelMapReduce(b *testing.B) {
498 | b.ReportAllocs()
499 |
500 | mapper := func(v interface{}) interface{} {
501 | return v.(int64) * v.(int64)
502 | }
503 | reducer := func(input <-chan interface{}) (interface{}, error) {
504 | var result int64
505 | for v := range input {
506 | result += v.(int64)
507 | }
508 | return result, nil
509 | }
510 | b.ResetTimer()
511 | From(func(input chan<- interface{}) {
512 | b.RunParallel(func(pb *testing.PB) {
513 | for pb.Next() {
514 | input <- int64(rand.Int())
515 | }
516 | })
517 | }).Map(mapper).Reduce(reducer)
518 | }
519 |
520 | func BenchmarkMapReduce(b *testing.B) {
521 | b.ReportAllocs()
522 |
523 | mapper := func(v interface{}) interface{} {
524 | return v.(int64) * v.(int64)
525 | }
526 | reducer := func(input <-chan interface{}) (interface{}, error) {
527 | var result int64
528 | for v := range input {
529 | result += v.(int64)
530 | }
531 | return result, nil
532 | }
533 | b.ResetTimer()
534 | From(func(input chan<- interface{}) {
535 | for i := 0; i < b.N; i++ {
536 | input <- int64(rand.Int())
537 | }
538 | }).Map(mapper).Reduce(reducer)
539 | }
540 |
541 | func assetEqual(t *testing.T, except, data interface{}) {
542 | if !reflect.DeepEqual(except, data) {
543 | t.Errorf(" %v, want %v", data, except)
544 | }
545 | }
546 |
547 | func equal(t *testing.T, stream Stream, data []interface{}) {
548 | items := make([]interface{}, 0)
549 | for item := range stream.source {
550 | items = append(items, item)
551 | }
552 | if !reflect.DeepEqual(items, data) {
553 | t.Errorf(" %v, want %v", items, data)
554 | }
555 | }
556 |
557 | func runCheckedTest(t *testing.T, fn func(t *testing.T)) {
558 | goroutines := runtime.NumGoroutine()
559 | fn(t)
560 | // let scheduler schedule first
561 | time.Sleep(time.Millisecond)
562 | assert.True(t, runtime.NumGoroutine() <= goroutines)
563 | }
564 |
--------------------------------------------------------------------------------