├── .github
└── FUNDING.yml
├── .gitignore
├── Src
├── Helper.cs
├── SplitPipeline.csproj
├── Job.cs
└── SplitPipelineCommand.cs
├── Module
└── en-US
│ ├── about_SplitPipeline.help.txt
│ └── SplitPipeline.dll-Help.ps1
├── Tests
├── Test-Transcript.ps1
├── Test-ProgressJobs.ps1
├── Order.test.ps1
├── Filter.test.ps1
├── Host.test.ps1
├── Import.test.ps1
├── Test-Start-Job.ps1
├── Test-ProgressTotal2.ps1
├── Count.test.ps1
├── Test-ProgressTotal.ps1
├── Scripts.test.ps1
├── About.test.ps1
├── Stopping.test.ps1
├── Test-Stopping-Random.ps1
├── Test-Refill.ps1
└── Load.test.ps1
├── README.md
├── 1.build.ps1
├── Release-Notes.md
└── LICENSE
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: [nightroman]
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | bin
2 | obj
3 | z
4 | z.*
5 | *.html
6 | *.user
7 | launchSettings.json
8 | Module/*.psd1
9 | Src/Directory.Build.props
10 |
--------------------------------------------------------------------------------
/Src/Helper.cs:
--------------------------------------------------------------------------------
1 | using System.Management.Automation;
2 |
3 | namespace SplitPipeline;
4 |
5 | ///
6 | /// Pipeline helper methods exposed via the variable.
7 | ///
8 | public class Helper
9 | {
10 | ///
11 | /// Invokes the script with mutually exclusive lock.
12 | ///
13 | public object Lock(ScriptBlock script)
14 | {
15 | if (script == null) throw new ArgumentNullException("script");
16 | lock (this)
17 | {
18 | return script.InvokeReturnAsIs();
19 | }
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/Src/SplitPipeline.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | netstandard2.0
4 | 10.0
5 | enable
6 | true
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/Module/en-US/about_SplitPipeline.help.txt:
--------------------------------------------------------------------------------
1 | TOPIC
2 | about_SplitPipeline
3 |
4 | SHORT DESCRIPTION
5 | SplitPipeline - Parallel Data Processing in PowerShell
6 |
7 | LONG DESCRIPTION
8 | The only cmdlet is Split-Pipeline. It splits the input, processes parts by
9 | parallel pipelines, and outputs data for further processing. It may work
10 | without collecting the whole input, large or infinite.
11 |
12 | Get help:
13 | PS> Import-Module SplitPipeline
14 | PS> help -Full Split-Pipeline
15 |
16 | SEE ALSO
17 | Project site: https://github.com/nightroman/SplitPipeline
18 | Split-Pipeline
19 |
--------------------------------------------------------------------------------
/Tests/Test-Transcript.ps1:
--------------------------------------------------------------------------------
1 | <#
2 | .Synopsis
3 | How to use transcript with Write-Host in pipelines.
4 |
5 | .Description
6 | This technique works around the issue #25.
7 |
8 | .Link
9 | https://github.com/nightroman/SplitPipeline/issues/25
10 | #>
11 |
12 | Start-Transcript "$env:TEMP\z.log"
13 |
14 | # The helper for Write-Host for pipelines working with transcript.
15 | $helper = New-Module -AsCustomObject -ScriptBlock {
16 | Import-Module Microsoft.PowerShell.Utility
17 | function WriteHost {
18 | Write-Host $args[0]
19 | }
20 | }
21 |
22 | 1..42 | Split-Pipeline -Variable helper -Script {process{
23 | # call the helper Write-Host using the lock
24 | $Pipeline.Lock({ $helper.WriteHost("log ($_)") })
25 |
26 | # normal processing
27 | "process ($_)"
28 | }}
29 |
30 | Stop-Transcript
31 |
--------------------------------------------------------------------------------
/Tests/Test-ProgressJobs.ps1:
--------------------------------------------------------------------------------
1 |
2 | <#
3 | .Synopsis
4 | How to use Write-Progress in jobs to show each job progress.
5 |
6 | .Description
7 | The Begin script assigns $id to each job using the shared counter $lastId.
8 | $lastId does not have to be synchronised because Begin is invoked for each
9 | job on its creation synchronously. As far as Begin is invoked in a separate
10 | runspace, the counter has to be passed in via Variable.
11 |
12 | Then each job uses its $id as activity ID for Write-Progress so that each
13 | job progress is visualized separately.
14 | #>
15 |
16 | Import-Module SplitPipeline
17 |
18 | $lastId = [ref]0
19 |
20 | 1..100 | Split-Pipeline -Count 5 -Variable lastId {
21 | $data = @($input)
22 | for($1 = 1; $1 -le $data.Count; ++$1) {
23 | Write-Progress -Id $id -Activity "Job $id" -Status Processing -PercentComplete (100*$1/$data.Count)
24 | Start-Sleep -Milliseconds (Get-Random -Maximum 500)
25 | }
26 | } -Begin {
27 | $id = ++$lastId.Value
28 | }
29 |
--------------------------------------------------------------------------------
/Tests/Order.test.ps1:
--------------------------------------------------------------------------------
1 |
2 | <#
3 | .Synopsis
4 | Tests Split-Pipeline -Order.
5 |
6 | .Link
7 | Invoked by https://github.com/nightroman/Invoke-Build
8 | #>
9 |
10 | Import-Module SplitPipeline
11 | Set-StrictMode -Version Latest
12 |
13 | task Ordered {
14 | # common parameters of two tests
15 | $param = @{
16 | Variable = 'lastId'
17 | Count = 3
18 | Load = 1, 5
19 | Begin = {
20 | $id = ++$lastId.Value
21 | }
22 | Script = {
23 | $input
24 | [System.Threading.Thread]::Sleep((3 - $id) * 50)
25 | }
26 | }
27 |
28 | $data = 1..100
29 | $sample = "$data"
30 |
31 | # unordered
32 | $lastId = [ref]-1
33 | ($r = 1..100 | Split-Pipeline @param)
34 | if ("$r" -eq $sample) { Write-Warning "Normally expected unordered data." }
35 |
36 | # ordered
37 | $lastId = [ref]-1
38 | ($r = 1..100 | Split-Pipeline -Order @param)
39 | equals "$r" $sample
40 |
41 | # ordered, 1.6.0
42 | $lastId = [ref]-1
43 | ($r = Split-Pipeline -Order @param (1..100))
44 | equals "$r" $sample
45 | }
46 |
--------------------------------------------------------------------------------
/Tests/Filter.test.ps1:
--------------------------------------------------------------------------------
1 |
2 | <#
3 | .Synopsis
4 | Tests Split-Pipeline -Filter.
5 |
6 | .Link
7 | Invoked by https://github.com/nightroman/Invoke-Build
8 | #>
9 |
10 | Import-Module SplitPipeline
11 | Set-StrictMode -Version Latest
12 |
13 | task Error {
14 | $$ = try { 1..9 | Split-Pipeline {} -Filter 42 } catch { $_ }
15 | assert ("$$" -clike @'
16 | *Exception setting "Filter": "Expected a hashtable or a script block."
17 | '@)
18 | }
19 |
20 | task FilterInputUniqueByScript {
21 | $hash = @{}
22 | 1,1,2,2,3,3,4,4,5,5 | Split-Pipeline -OutVariable OutVariable {$input} -Filter {
23 | if (!$hash.Contains($args[0])) {
24 | $hash.Add($args[0], $null)
25 | $true
26 | }
27 | }
28 | equals $OutVariable.Count 5
29 | equals '1 2 3 4 5' (($OutVariable | Sort-Object) -join ' ')
30 | }
31 |
32 | task FilterInputUniqueByHashtable {
33 | 1,1,2,2,3,3,4,4,5,5 | Split-Pipeline -OutVariable OutVariable {$input} -Filter @{}
34 | equals $OutVariable.Count 5
35 | equals '1 2 3 4 5' (($OutVariable | Sort-Object) -join ' ')
36 | }
37 |
--------------------------------------------------------------------------------
/Tests/Host.test.ps1:
--------------------------------------------------------------------------------
1 |
2 | <#
3 | .Synopsis
4 | Tests Split-Pipeline host features.
5 |
6 | .Link
7 | Invoked by https://github.com/nightroman/Invoke-Build
8 | #>
9 |
10 | Import-Module SplitPipeline
11 | Set-StrictMode -Version Latest
12 | $Version = $PSVersionTable.PSVersion.Major
13 |
14 | task ProgressJobs {
15 | exec { PowerShell.exe .\Test-ProgressJobs.ps1 }
16 | }
17 |
18 | task ProgressTotal {
19 | exec { PowerShell.exe .\Test-ProgressTotal.ps1 }
20 | }
21 |
22 | task ProgressTotal2 {
23 | exec { PowerShell.exe .\Test-ProgressTotal2.ps1 }
24 | }
25 |
26 | task WriteHost {
27 | 1..5 | Split-Pipeline -Count 5 -Variable lastId {process{
28 | Write-Host "Item $_"
29 | "Done $_"
30 | }}
31 | }
32 |
33 | task Transcript -If ($Version -ge 5) {
34 | .\Test-Transcript.ps1
35 |
36 | $r = [IO.File]::ReadAllLines("$env:TEMP\z.log")
37 | assert ($r -contains 'log (1)')
38 | assert ($r -contains 'log (42)')
39 | assert ($r -contains 'process (1)')
40 | assert ($r -contains 'process (42)')
41 |
42 | remove "$env:TEMP\z.log"
43 | }
44 |
--------------------------------------------------------------------------------
/Tests/Import.test.ps1:
--------------------------------------------------------------------------------
1 |
2 | <#
3 | .Synopsis
4 | Tests Split-Pipeline -Variable -Function -Module.
5 |
6 | .Link
7 | Invoked by https://github.com/nightroman/Invoke-Build
8 | #>
9 |
10 | Import-Module SplitPipeline
11 | Set-StrictMode -Version Latest
12 |
13 | task ImportVariable {
14 | $value1 = 1
15 | $value2 = 2
16 | $result = 1..10 | Split-Pipeline -Count 2 -Variable value1, value2 {
17 | if ($value1 -ne 1) {throw 'value1'}
18 | if ($value2 -ne 2) {throw 'value2'}
19 | $input
20 | }
21 | equals $result.Count 10
22 | }
23 |
24 | task ImportFunction {
25 | function Function1 {1}
26 | function Function2 {2}
27 | $result = 1..10 | Split-Pipeline -Count 2 -Function Function1, Function2 {
28 | if ((Function1) -ne 1) {throw 'Function1'}
29 | if ((Function2) -ne 2) {throw 'Function2'}
30 | $input
31 | }
32 | equals $result.Count 10
33 | }
34 |
35 | task ImportModule {
36 | $result = 1..10 | Split-Pipeline -Count 2 -Module SplitPipeline {
37 | $input | Split-Pipeline -Count 2 {$input}
38 | }
39 | equals $result.Count 10
40 | }
41 |
--------------------------------------------------------------------------------
/Tests/Test-Start-Job.ps1:
--------------------------------------------------------------------------------
1 | <#
2 | .Synopsis
3 | How to use Start-Job for pipelines in separate processes.
4 |
5 | .Description
6 | Use Start-Job to run pipelines in separate processes, e.g. in cases like
7 | https://github.com/nightroman/SplitPipeline/issues/32
8 |
9 | The sample jobs would take ~8 seconds when run sequentially.
10 | With Split-Pipeline and Start-Job they take ~4 seconds.
11 |
12 | Note that Start-Job is relatively expensive and
13 | Split-Pipeline may work slower with faster jobs.
14 | #>
15 |
16 | Import-Module SplitPipeline
17 |
18 | $sw = [System.Diagnostics.Stopwatch]::StartNew()
19 |
20 | $data = 1..8 | Split-Pipeline -Count 4 {process{
21 | $job = Start-Job -ArgumentList $_ {
22 | # fake time consuming job
23 | Start-Sleep 1
24 |
25 | # output the current item and process ID
26 | [PSCustomObject]@{
27 | Item = $args[0]
28 | PID = $PID
29 | }
30 | }
31 | $job | Receive-Job -Wait
32 | }}
33 |
34 | [PSCustomObject]@{
35 | Time = $sw.Elapsed.TotalSeconds
36 | Data = $data
37 | }
38 |
--------------------------------------------------------------------------------
/Tests/Test-ProgressTotal2.ps1:
--------------------------------------------------------------------------------
1 | <#
2 | .Synopsis
3 | Test-ProgressTotal.ps1 using the helper $Pipeline.Lock.
4 |
5 | .Description
6 | This sample is the simplified variant of Test-ProgressTotal.ps1.
7 | The helper $Pipeline.Lock was introduced for scenarios like this.
8 |
9 | .Notes
10 | [hashtable]::Synchronized() or concurrent dictionary are tempting but not
11 | suitable for increments or counters due to their not atomic nature.
12 | #>
13 |
14 | Import-Module SplitPipeline
15 |
16 | # input items
17 | $items = 1..100
18 |
19 | # shared data
20 | $data = @{
21 | Count = $items.Count
22 | Done = 0
23 | }
24 |
25 | $items | Split-Pipeline -Count 5 -Variable data {process{
26 | # simulate some job
27 | Start-Sleep -Milliseconds (Get-Random -Maximum 500)
28 |
29 | # update and get shared data using the lock
30 | #! covers `InvokeReturnAsIs` instead of `Invoke`
31 | $done = $Pipeline.Lock({ $done = ++$data.Done; $done })
32 |
33 | # show progress
34 | Write-Progress -Activity "Done $done" -Status Processing -PercentComplete (100 * $done / $data.Count)
35 | }}
36 |
37 | # assert
38 | if ($data.Done -ne $items.Count) { throw 'Processed and input item counts must be equal.' }
39 |
--------------------------------------------------------------------------------
/Tests/Count.test.ps1:
--------------------------------------------------------------------------------
1 |
2 | <#
3 | .Synopsis
4 | Tests Split-Pipeline -Count.
5 |
6 | .Link
7 | Invoked by https://github.com/nightroman/Invoke-Build
8 | #>
9 |
10 | Import-Module SplitPipeline
11 | Set-StrictMode -Version Latest
12 |
13 | # Use large enough number of items. Small number may not load all cores.
14 | # Example: 20 items for 8 cores actually gives 7 pipes: 3, 3, .. 2
15 | $ItemCount = 1000
16 | $ProcessorCount = [Environment]::ProcessorCount
17 |
18 | task Error {
19 | # [0] <= 0 ~ default
20 | $r = 1..$ItemCount | Split-Pipeline {@($input).Count} -Count 0, -1
21 | equals $r.Count $ProcessorCount
22 |
23 | $$ = try { 1..9 | Split-Pipeline {} -Count 1, -1 } catch { $_ }
24 | assert ("$$" -clike @'
25 | *Exception setting "Count": "Count maximum must be greater or equal to minimum."
26 | '@)
27 | }
28 |
29 | task LessThanProcessorCount {
30 | $r = @(1..$ItemCount | Split-Pipeline {1} -Count 1, 1)
31 | equals $r.Count 1
32 | }
33 |
34 | task EqualToProcessorCount0 {
35 | $r = @(1..$ItemCount | Split-Pipeline {1} -Count 1, $ProcessorCount)
36 | equals $r.Count $ProcessorCount
37 | }
38 |
39 | task EqualToProcessorCount1 {
40 | $r = @(1..$ItemCount | Split-Pipeline {1} -Count 1, ($ProcessorCount + 1))
41 | equals $r.Count $ProcessorCount
42 | }
43 |
--------------------------------------------------------------------------------
/Tests/Test-ProgressTotal.ps1:
--------------------------------------------------------------------------------
1 | <#
2 | .Synopsis
3 | How to use Write-Progress in jobs to show the total progress.
4 |
5 | .Description
6 | The hashtable $data is used by jobs simultaneously. It contains the total
7 | number of items Count (read only) and the counter of processed items Done
8 | (read and written). These data are used to calculate the percentage for
9 | Write-Progress.
10 |
11 | Note that Done is updated in a critical section. Use of try/finally there
12 | may be redundant in this trivial example but this is the standard pattern.
13 |
14 | .Notes
15 | [hashtable]::Synchronized() or concurrent dictionary are tempting but not
16 | suitable for increments or counters due to their not atomic nature.
17 | #>
18 |
19 | Import-Module SplitPipeline
20 |
21 | # input items
22 | $items = 1..100
23 |
24 | # shared data
25 | $data = @{
26 | Count = $items.Count
27 | Done = 0
28 | }
29 |
30 | $items | Split-Pipeline -Count 5 -Variable data {process{
31 | # simulate some job
32 | Start-Sleep -Milliseconds (Get-Random -Maximum 500)
33 |
34 | # enter the critical section
35 | [System.Threading.Monitor]::Enter($data.SyncRoot)
36 | try {
37 | # update shared data
38 | $done = ++$data.Done
39 | }
40 | finally {
41 | # exit the critical section
42 | [System.Threading.Monitor]::Exit($data)
43 | }
44 |
45 | # show progress
46 | Write-Progress -Activity "Done $done" -Status Processing -PercentComplete (100 * $done / $data.Count)
47 | }}
48 |
49 | # assert
50 | if ($data.Done -ne $items.Count) { throw 'Processed and input item counts must be equal.' }
51 |
--------------------------------------------------------------------------------
/Tests/Scripts.test.ps1:
--------------------------------------------------------------------------------
1 | <#
2 | .Synopsis
3 | Tests Split-Pipeline -Begin -Script -End -Finally.
4 |
5 | .Link
6 | Invoked by https://github.com/nightroman/Invoke-Build
7 | #>
8 |
9 | Import-Module SplitPipeline
10 | Set-StrictMode -Version Latest
11 |
12 | $IsCore = $PSVersionTable.PSEdition -eq 'Core'
13 |
14 | task Finally1 {
15 | $1 = ''
16 | try {
17 | 1..10 | Split-Pipeline -Count 2 -Load 1 `
18 | -Script {throw 'Throw in Script'} `
19 | -Finally {throw 'Throw in Finally'}
20 | }
21 | catch { $1 = "$_" }
22 | equals $1 'Throw in Script'
23 | }
24 |
25 | task Finally2 {
26 | $result = @(
27 | 1..2 | Split-Pipeline -Count 2 -Load 1 `
28 | -Script {process{$_}} `
29 | -Finally {throw 'Throw in Finally'}
30 | )
31 |
32 | assert ($result.Count -eq 2) $result.Count
33 | }
34 |
35 | task BeginProcessEnd {
36 | $DebugPreference = 'Continue'
37 |
38 | # Use error action Continue or Write-Error will stop
39 | $result = 1..4 | Split-Pipeline -ErrorAction Continue -Count 2 -Load 1 -Verbose `
40 | -Begin {
41 | $DebugPreference = 'Continue'
42 | $VerbosePreference = 'Continue'
43 | 'begin split'
44 | Write-Debug 'Debug in begin split'
45 | Write-Error 'Error in begin split'
46 | Write-Verbose 'Verbose in begin split'
47 | Write-Warning 'Warning in begin split'
48 | } `
49 | -End {
50 | 'end split'
51 | Write-Debug 'Debug in end split'
52 | Write-Error 'Error in end split'
53 | Write-Verbose 'Verbose in end split'
54 | Write-Warning 'Warning in end split'
55 | } `
56 | -Script {
57 | begin {
58 | 'begin part'
59 | Write-Debug 'Debug in script'
60 | Write-Error 'Error in script'
61 | Write-Verbose 'Verbose in script'
62 | Write-Warning 'Warning in script'
63 | }
64 | process {
65 | $_
66 | }
67 | end {
68 | 'end part'
69 | }
70 | }
71 | $result
72 |
73 | # 1 or 2 'begin/end split' due to -Count 2
74 | $begin_split = ($result -eq 'begin split').Count
75 | $end_split = ($result -eq 'end split').Count
76 | assert ($begin_split -eq 1 -or $begin_split -eq 2) $begin_split
77 | assert ($end_split -eq 1 -or $end_split -eq 2) $end_split
78 | equals $begin_split $end_split
79 |
80 | # 4 'begin/end part' due to 4 items and -Limit 1
81 | equals ($result -eq 'begin part').Count 4
82 | equals ($result -eq 'end part').Count 4
83 |
84 | # all
85 | equals $result.Count (12 + 2 * $end_split)
86 | }
87 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://www.powershellgallery.com/packages/SplitPipeline)
2 |
3 | # SplitPipeline
4 |
5 | PowerShell module for parallel data processing
6 |
7 | SplitPipeline is designed for Windows PowerShell 5.1 and PowerShell Core.
8 | It provides the only command `Split-Pipeline`.
9 |
10 | `Split-Pipeline` splits the input, processes parts by parallel pipelines, and
11 | outputs results. It may work without collecting the whole input, large or
12 | infinite.
13 |
14 | ## Quick Start
15 |
16 | **Step 1:** Get and install.
17 |
18 | The module is published at the PSGallery: [SplitPipeline](https://www.powershellgallery.com/packages/SplitPipeline).
19 | It may be installed by this command:
20 |
21 | ```powershell
22 | Install-Module SplitPipeline
23 | ```
24 |
25 | **Step 2:** Import the module:
26 |
27 | ```powershell
28 | Import-Module SplitPipeline
29 | ```
30 |
31 | **Step 3:** Take a look at help:
32 |
33 | ```powershell
34 | help Split-Pipeline
35 | ```
36 |
37 | **Step 4:** Try these three commands performing the same job simulating long
38 | but not processor consuming operations on each item:
39 |
40 | ```powershell
41 | 1..10 | . {process{ $_; sleep 1 }}
42 | 1..10 | Split-Pipeline {process{ $_; sleep 1 }}
43 | 1..10 | Split-Pipeline -Count 10 {process{ $_; sleep 1 }}
44 | ```
45 |
46 | Output of all commands is the same, numbers from 1 to 10 (Split-Pipeline does
47 | not guarantee the same order without the switch `Order`). But consumed times
48 | are different. Let's measure them:
49 |
50 | ```powershell
51 | Measure-Command { 1..10 | . {process{ $_; sleep 1 }} }
52 | Measure-Command { 1..10 | Split-Pipeline {process{ $_; sleep 1 }} }
53 | Measure-Command { 1..10 | Split-Pipeline -Count 10 {process{ $_; sleep 1 }} }
54 | ```
55 |
56 | The first command takes about 10 seconds.
57 |
58 | Performance of the second command depends on the number of processors which is
59 | used as the default split count. For example, with 2 processors it takes about
60 | 6 seconds.
61 |
62 | The third command takes about 2 seconds. The number of processors is not very
63 | important for such sleeping jobs. The split count is important. Increasing it
64 | to some extent improves overall performance. As for intensive jobs, the split
65 | count normally should not exceed the number of processors.
66 |
67 | ## See also
68 |
69 | - [SplitPipeline Release Notes](https://github.com/nightroman/SplitPipeline/blob/main/Release-Notes.md)
70 |
--------------------------------------------------------------------------------
/Tests/About.test.ps1:
--------------------------------------------------------------------------------
1 | <#
2 | .Synopsis
3 | Tests Split-Pipeline.
4 |
5 | .Link
6 | Invoked by https://github.com/nightroman/Invoke-Build
7 | #>
8 |
9 | #requires -Modules SplitPipeline
10 | Set-StrictMode -Version 3
11 |
12 | task help {
13 | . Helps.ps1
14 | Test-Helps ..\Module\en-US\SplitPipeline.dll-Help.ps1
15 | }
16 |
17 | task ApartmentState {
18 | equals MTA (1 | Split-Pipeline { [System.Threading.Thread]::CurrentThread.ApartmentState.ToString() })
19 | equals MTA (1 | Split-Pipeline -ApartmentState MTA { [System.Threading.Thread]::CurrentThread.ApartmentState.ToString() })
20 | equals STA (1 | Split-Pipeline -ApartmentState STA { [System.Threading.Thread]::CurrentThread.ApartmentState.ToString() })
21 | }
22 |
23 | task JobSoftErrorAndCmdletErrorContinueMode {
24 | 42 | Split-Pipeline -ErrorAction Continue -OutVariable OV -ErrorVariable EV {process{
25 | $_
26 | Get-Variable MissingSafe
27 | }}
28 |
29 | equals $OV.Count 1
30 | equals $OV[0] 42
31 | equals $EV.Count 1
32 | assert ('ObjectNotFound: (MissingSafe:String) [Split-Pipeline], ItemNotFoundException' -eq $EV[0].CategoryInfo)
33 | }
34 |
35 | task JobSoftErrorThenFailure {
36 | $e = ''
37 | try {
38 | 42 | Split-Pipeline {process{
39 | Get-Variable MissingSafe
40 | Get-Variable MissingStop -ErrorAction Stop
41 | }}
42 | }
43 | catch {($e = $_)}
44 | assert ('ObjectNotFound: (MissingStop:String) [Get-Variable], ItemNotFoundException' -eq $e.CategoryInfo)
45 | }
46 |
47 | task Refill {
48 | .\Test-Refill.ps1
49 | }
50 |
51 | # Issue #12
52 | task VerbosePreferenceString {
53 | $VerbosePreference = 'Continue'
54 | 1 | Split-Pipeline {
55 | Write-Verbose test-verbose
56 | }
57 | }
58 |
59 | # Issue #12
60 | task VerbosePreferenceNumber {
61 | $VerbosePreference = 2
62 | 1 | Split-Pipeline {
63 | Write-Verbose test-verbose
64 | }
65 | }
66 |
67 | # Issue #12
68 | task VerbosePreferenceInvalid {
69 | $VerbosePreference = 'Invalid'
70 | 1 | Split-Pipeline {
71 | Write-Verbose test-verbose
72 | }
73 | }
74 |
75 | # Issue #29
76 | # 2024-01-11: With v2.0.0 or Windows 11 or new PC, output is less predictble
77 | task WarningVariable {
78 | 1..2 | Split-Pipeline -WarningVariable WV {process{ Write-Warning "test-WarningVariable" }}
79 | assert ($WV.Count -ge 2)
80 | equals $WV[0].Message test-WarningVariable
81 | equals $WV[1].Message test-WarningVariable
82 | }
83 |
84 | # Issue #32
85 | task Test-Start-Job -If ($Host.Name -ne 'FarHost') {
86 | $r = ./Test-Start-Job.ps1
87 | $r | Out-String
88 |
89 | # expected saved time
90 | assert ($r.Time -lt 8)
91 |
92 | # expected 20 items with different PIDs
93 | $data = $r.Data | Sort-Object Item
94 | equals $data.Count 8
95 | equals $data[0].Item 1
96 | equals $data[-1].Item 8
97 | assert ($data[0].PID -ne $data[1].PID)
98 | }
99 |
--------------------------------------------------------------------------------
/Tests/Stopping.test.ps1:
--------------------------------------------------------------------------------
1 | <#
2 | .Synopsis
3 | Tests stopping of Split-Pipeline.
4 | #>
5 |
6 | #requires -Modules SplitPipeline
7 | Set-StrictMode -Version 3
8 |
9 | <#
10 | [Ctrl-C] hangs in v1.2.0, works in 1.2.1 https://github.com/nightroman/SplitPipeline/issues/3
11 |
12 | MANUAL TEST SCRIPT
13 |
14 | (!) Ensure notepad is configured to open new windows.
15 |
16 | 1..4 | Split-Pipeline -Verbose -Count 2 {process{
17 | $p = Start-Process notepad -PassThru
18 | $p.WaitForExit()
19 | }}
20 |
21 | - Invoke the script. Two notepads are opened by two jobs. Split-Pipeline waits for them.
22 | - Press [Ctrl-C] in the console. Split-Pipeline still waits because WaitForExit is not stopped this way.
23 | - Close notepads. Split-Pipeline exits, not hangs.
24 | #>
25 | task Issue3 {
26 | assert (!(Get-Process notepad -ErrorAction Ignore))
27 |
28 | remove C:\TEMP\SplitPipelineIssue3
29 | $null = mkdir C:\TEMP\SplitPipelineIssue3
30 |
31 | # Split-Pipeline to be stopped
32 | $ps = [PowerShell]::Create()
33 | $null = $ps.AddScript({
34 | Import-Module SplitPipeline
35 | 1..4 | Split-Pipeline -Verbose -Count 2 -Script {process{
36 | $p = Start-Process notepad -PassThru
37 | $p.WaitForExit()
38 | }} -Begin {
39 | $id = [runspace]::DefaultRunspace.InstanceId
40 | 1 > "C:\TEMP\SplitPipelineIssue3\Begin-$id"
41 | } -End {
42 | 1 > "C:\TEMP\SplitPipelineIssue3\End-$id"
43 | } -Finally {
44 | 1 > "C:\TEMP\SplitPipelineIssue3\Finally-$id"
45 | }
46 | })
47 |
48 | # start Split-Pipeline
49 | 'BeginInvoke'
50 | $null = $ps.BeginInvoke()
51 |
52 | # wait for two jobs to start, i.e. two processes
53 | while(@(Get-Process notepad -ErrorAction Ignore).Count -lt 2) {
54 | Start-Sleep -Milliseconds 100
55 | }
56 |
57 | # 2 jobs started
58 | equals @(Get-Process notepad).Count 2
59 |
60 | # start stopping, fake [Ctrl-C]
61 | 'BeginStop'
62 | $a2 = $ps.BeginStop($null, $null)
63 |
64 | #! kill processes, this releases jobs
65 | #! PSv2 Stop-Process is not enough
66 | Start-Sleep 2
67 | while(Get-Process notepad -ErrorAction Ignore) {
68 | Stop-Process -Name notepad
69 | Start-Sleep -Milliseconds 100
70 | }
71 |
72 | # wait, hangs in v1.2.0
73 | 'WaitOne'
74 | $null = $a2.AsyncWaitHandle.WaitOne()
75 |
76 | # no new jobs or processes (3 and 4)
77 | Start-Sleep 2
78 | assert (!(Get-Process notepad -ErrorAction Ignore))
79 |
80 | # logs
81 | $logs = Get-Item C:\TEMP\SplitPipelineIssue3\*
82 | equals $logs.Count 4
83 | assert ($logs[0].Name -like 'Begin-*-*-*-*-*')
84 | assert ($logs[1].Name -like 'Begin-*-*-*-*-*')
85 | assert ($logs[2].Name -like 'Finally-*-*-*-*-*')
86 | assert ($logs[3].Name -like 'Finally-*-*-*-*-*')
87 |
88 | # end
89 | remove C:\TEMP\SplitPipelineIssue3
90 | }
91 |
92 | task Random {
93 | .\Test-Stopping-Random.ps1 10
94 | }
95 |
--------------------------------------------------------------------------------
/Tests/Test-Stopping-Random.ps1:
--------------------------------------------------------------------------------
1 |
2 | <#
3 | .Synopsis
4 | Tests random stopping of Split-Pipeline.
5 |
6 | .Description
7 | Without parameters it repeats random tests infinitely.
8 |
9 | It starts Split-Pipeline with large enough input, slow Script, and Begin
10 | and Finally scripts. Then it waits for a random time and stops (like by
11 | Ctrl-C). Then it checks that Begin and Finally logs match, i.e. for each
12 | started job the Finally script should work even on stopping.
13 |
14 | .Parameter Repeat
15 | Specifies the number of tests.
16 | #>
17 |
18 | param(
19 | $Repeat = [int]::MaxValue
20 | )
21 |
22 | Set-StrictMode -Version Latest
23 |
24 | # global logs
25 | Add-Type @'
26 | using System;
27 | using System.Collections;
28 | public static class SplitPipelineLog {
29 | public static readonly ArrayList Begin = new ArrayList();
30 | public static readonly ArrayList Finally = new ArrayList();
31 | }
32 | '@
33 |
34 | # test to be invoked async
35 | $test = {
36 | Import-Module SplitPipeline
37 | $VerbosePreference = 2
38 | $lastId = [ref]-1
39 |
40 | $param = @{
41 | Variable = 'lastId'
42 | Verbose = $true
43 | Count = 10
44 | Load = 3, 1000
45 | Begin = {
46 | $random = New-Object System.Random
47 | $VerbosePreference = 2
48 | $id = ++$lastId.Value
49 | Write-Verbose "[$id] begin"
50 | $null = [SplitPipelineLog]::Begin.Add($id)
51 | }
52 | Finally = {
53 | $null = [SplitPipelineLog]::Finally.Add($id)
54 | }
55 | Script = {
56 | $all = @($input).Count
57 | Write-Verbose "[$id] $all items"
58 | [System.Threading.Thread]::Sleep($random.Next(0, 50))
59 | }
60 | }
61 |
62 | 1..1mb | Split-Pipeline @param
63 | }
64 |
65 | # repeat random tests
66 | for($n = 1; $n -le $Repeat; ++$n) {
67 | "[$n]" + '-'*70
68 |
69 | # reset logs
70 | [SplitPipelineLog]::Begin.Clear()
71 | [SplitPipelineLog]::Finally.Clear()
72 |
73 | # start Split-Pipeline
74 | $rs = [runspacefactory]::CreateRunspace($Host)
75 | $rs.Open()
76 | $ps = [PowerShell]::Create()
77 | $ps.Runspace = $rs
78 | $null = $ps.AddScript($test)
79 | $null = $ps.BeginInvoke()
80 |
81 | # wait for a random time
82 | $random = New-Object System.Random
83 | $sleep = $random.Next(0, 2000)
84 | "Stop after $sleep ms"
85 | [System.Threading.Thread]::Sleep($sleep)
86 |
87 | # stop
88 | $ps.Stop()
89 |
90 | # show results
91 | $ps.Streams.Error
92 | $ps.Streams.Verbose
93 |
94 | #! weird, else logs may not match
95 | Start-Sleep -Milliseconds 500
96 |
97 | # Begin and Finally should match
98 | $begin = [SplitPipelineLog]::Begin
99 | $finally = [SplitPipelineLog]::Finally
100 | "$begin"
101 | "$finally"
102 | if ($begin.Count -ne $finally.Count) {
103 | Write-Warning "$begin <> $finally"
104 | Read-Host 'Enter'
105 | }
106 | }
107 |
--------------------------------------------------------------------------------
/Tests/Test-Refill.ps1:
--------------------------------------------------------------------------------
1 |
2 | <#
3 | .Synopsis
4 | Tests Split-Pipeline -Refill and compares with the alternative method.
5 |
6 | .Description
7 | This is an example of Split-Pipeline with refilled input. The convention is
8 | simple: [ref] objects refill the input, other objects go to output as usual.
9 |
10 | This test processes hierarchical data using two methods:
11 | 1) Split-Pipeline - parallel processing and refilled input;
12 | 2) Step-Node - sequential recursive stepping through nodes.
13 |
14 | Both methods simulate slow data request on a node $_ as:
15 |
16 | Start-Sleep -Milliseconds 500; $_.GetEnumerator()
17 |
18 | Both methods process/output leaf nodes in the same way:
19 |
20 | '{0}={1}' -f $node.Key, $node.Value
21 |
22 | Split-Pipeline refills the input with container nodes:
23 |
24 | [ref]$node.Value
25 |
26 | Step-Node calls itself recursively with container nodes:
27 |
28 | Step-Node $node.Value
29 |
30 | The test shows that sorted results of two methods are the same and
31 | Split-Pipeline normally works faster than Step-Node.
32 |
33 | Result order is different due to different order of node processing.
34 | Besides, order of Split-Pipeline results is not necessarily constant.
35 |
36 | .Link
37 | https://github.com/nightroman/SplitPipeline/blob/main/Tests/Test-Refill.ps1
38 | #>
39 |
40 | ### Hierarchical data: container nodes are represented by hashtables
41 | $node1 = @{data1=1; data2=2; data3=3}
42 | $node2 = @{node1=$node1; node2=$node1; data4=4; data5=5}
43 | $root = @{node1=$node2; node2=$node2; data6=6; data7=7}
44 | $root | Format-Custom | Out-String
45 |
46 | ### Test 1: Refill Split-Pipeline with nodes
47 | $time1 = [Diagnostics.Stopwatch]::StartNew()
48 | $data1 = $root | Split-Pipeline -Refill {process{
49 | foreach($node in $(Start-Sleep -Milliseconds 500; $_.GetEnumerator())) {
50 | if ($node.Value -is [hashtable]) {
51 | [ref]$node.Value
52 | }
53 | else {
54 | '{0}={1}' -f $node.Key, $node.Value
55 | }
56 | }
57 | }}
58 | $time1.Stop()
59 |
60 | ### Test 2: Step through nodes recursively
61 | $time2 = [Diagnostics.Stopwatch]::StartNew()
62 | function Step-Node($_) {
63 | foreach($node in $(Start-Sleep -Milliseconds 500; $_.GetEnumerator())) {
64 | if ($node.Value -is [hashtable]) {
65 | Step-Node $node.Value
66 | }
67 | else {
68 | '{0}={1}' -f $node.Key, $node.Value
69 | }
70 | }
71 | }
72 | $data2 = Step-Node $root
73 | $time2.Stop()
74 |
75 | ### Test: Sorted results should be the same
76 | $data1 = ($data1 | Sort-Object) -join ','
77 | $data2 = ($data2 | Sort-Object) -join ','
78 | $data1
79 | $data2
80 | if ($data1 -ne $data2) { throw 'Different results' }
81 |
82 | ### Test: Split-Pipeline should work faster than recursive processing
83 | $time1.Elapsed.ToString()
84 | $time2.Elapsed.ToString()
85 | if ($time1.Elapsed -ge $time2.Elapsed) { Write-Warning 'Unexpected times.' }
86 |
--------------------------------------------------------------------------------
/Tests/Load.test.ps1:
--------------------------------------------------------------------------------
1 |
2 | <#
3 | .Synopsis
4 | Tests Split-Pipeline -Load.
5 |
6 | .Link
7 | Invoked by https://github.com/nightroman/Invoke-Build
8 | #>
9 |
10 | Import-Module SplitPipeline
11 | Set-StrictMode -Version Latest
12 |
13 | # Count words in input data. We used to output just `@($input).Count` and check
14 | # output counts, i.e. $r[0] 1, $r[1] 1. The problem: 3rd load may output before
15 | # 2nd pipe is done. Thus we either should use -Order or output/check differently.
16 | # So we output joined items and check them anywhere, not just at [0] or [1].
17 | function Get-WordCount($Data) {
18 | $count = 0
19 | foreach($_ in $Data) {
20 | $count += $_.Split(' ').Length
21 | }
22 | $count
23 | }
24 |
25 | task Error {
26 | # 0 args
27 | ($r = try {1..9 | Split-Pipeline {} -Load @()} catch {$_})
28 | equals $r.FullyQualifiedErrorId 'ParameterArgumentValidationError,SplitPipeline.SplitPipelineCommand'
29 |
30 | # null
31 | ($r = try {1..9 | Split-Pipeline {} -Load $null} catch {$_})
32 | equals $r.FullyQualifiedErrorId 'ParameterArgumentValidationError,SplitPipeline.SplitPipelineCommand'
33 |
34 | # 3+ args
35 | ($r = try {1..9 | Split-Pipeline {} -Load 1,2,3} catch {$_})
36 | equals $r.FullyQualifiedErrorId 'ParameterArgumentValidationError,SplitPipeline.SplitPipelineCommand'
37 |
38 | # [0] > [1]
39 | ($r = try {1..9 | Split-Pipeline {} -Load 1,0} catch {$_})
40 | equals $r.FullyQualifiedErrorId 'ParameterBindingFailed,SplitPipeline.SplitPipelineCommand'
41 |
42 | # [0]<1 is fine and treated as omitted, [1] is ignored
43 | $r = 1..9 | Split-Pipeline {@($input).Count} -Load 0,-1 -Count 2
44 | equals $r.Count 2
45 | equals $r[0] 5
46 | equals $r[1] 4
47 | }
48 |
49 | # v1.4.0 By default the whole input is collected and split evenly
50 | #! The order is not guaranteed but so far this test works as is.
51 | task TheWholeInput {
52 | ($r = 1..11 | Split-Pipeline -Count 2 {@($input).Count})
53 | equals $r.Count 2
54 | equals $r[0] 6
55 | equals $r[1] 5
56 |
57 | # same using the parameter, 1.6.0
58 | ($r = Split-Pipeline -Count 2 {@($input).Count} (1..11))
59 | equals $r.Count 2
60 | equals $r[0] 6
61 | equals $r[1] 5
62 | }
63 |
64 | # `-Load 1` lets the algorithm to work as soon as any input available
65 | #! This test was the first to show not predicted order problems and was redesigned.
66 | task LetItChoose {
67 | ($r = 1..11 | Split-Pipeline -Count 2 {@($input) -join ' '} -Load 1)
68 | assert ($r.Count -ge 4)
69 | assert ($r -contains '1')
70 | assert ($r -contains '2')
71 | equals (Get-WordCount $r) 11
72 | }
73 |
74 | # `-Load 2` sets the minimum
75 | task Min2MaxX {
76 | ($r = 1..11 | Split-Pipeline -Count 2 {@($input) -join ' '} -Load 2)
77 | assert ($r.Count -ge 4)
78 | assert ($r -contains '1 2')
79 | assert ($r -contains '3 4')
80 | equals (Get-WordCount $r) 11
81 | }
82 |
83 | # `-Load 4,4` sets the part size to 4
84 | task Min4Max4 {
85 | ($r = 1..11 | Split-Pipeline -Count 2 {@($input) -join ' '} -Load 4,4 -Order)
86 | equals $r.Count 3
87 | assert ($r -contains '1 2 3 4')
88 | assert ($r -contains '5 6 7 8')
89 | assert ($r -contains '9 10 11')
90 | }
91 |
--------------------------------------------------------------------------------
/Src/Job.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.ObjectModel;
2 | using System.Management.Automation;
3 | using System.Management.Automation.Runspaces;
4 |
5 | namespace SplitPipeline;
6 |
7 | class Job
8 | {
9 | readonly PowerShell _posh = PowerShell.Create();
10 | IAsyncResult _async;
11 |
12 | ///
13 | /// Gets the pipeline streams.
14 | ///
15 | public PSDataStreams Streams { get { return _posh.Streams; } }
16 | ///
17 | /// Gets the wait handle of the async pipeline.
18 | ///
19 | public WaitHandle WaitHandle { get { return _async.AsyncWaitHandle; } }
20 | ///
21 | /// Gets true if it is not completed or failed.
22 | ///
23 | public bool IsWorking
24 | {
25 | get
26 | {
27 | switch (_posh.InvocationStateInfo.State)
28 | {
29 | case PSInvocationState.Completed: return false;
30 | case PSInvocationState.Failed: return false;
31 | }
32 | return true;
33 | }
34 | }
35 | ///
36 | /// New job with its runspace. The runspace gets opened.
37 | ///
38 | public Job(Runspace runspace)
39 | {
40 | _posh.Runspace = runspace;
41 | runspace.Open();
42 | }
43 | ///
44 | /// Invokes the begin script, if any, sets the pipeline script once, returns the begin output.
45 | ///
46 | public Collection InvokeBegin(string begin, string script)
47 | {
48 | Collection result = null;
49 | if (begin != null)
50 | {
51 | _posh.AddScript(begin, false);
52 | result = _posh.Invoke();
53 | _posh.Commands.Clear();
54 | }
55 |
56 | _posh.AddScript(script);
57 | return result;
58 | }
59 | ///
60 | /// Starts the pipeline script async.
61 | ///
62 | public void BeginInvoke(Queue queue, int count)
63 | {
64 | var input = new PSDataCollection(count);
65 | while (--count >= 0)
66 | input.Add(queue.Dequeue());
67 | input.Complete();
68 |
69 | _async = _posh.BeginInvoke(input);
70 | }
71 | ///
72 | /// Waits for the pipeline to finish and returns its output.
73 | ///
74 | ///
75 | public PSDataCollection EndInvoke()
76 | {
77 | if (_async == null)
78 | return null;
79 |
80 | return _posh.EndInvoke(_async);
81 | }
82 | ///
83 | /// Invokes the end script and returns its output.
84 | ///
85 | public Collection InvokeEnd(string script)
86 | {
87 | _posh.Commands.Clear();
88 | _posh.AddScript(script, false);
89 | return _posh.Invoke();
90 | }
91 | ///
92 | /// Invokes the final script, its output is ignored.
93 | ///
94 | public void InvokeFinally(string script)
95 | {
96 | // it may be still running, e.g. on stopping
97 | if (_posh.InvocationStateInfo.State == PSInvocationState.Running)
98 | _posh.Stop();
99 |
100 | // invoke
101 | _posh.Commands.Clear();
102 | _posh.AddScript(script, false);
103 | _posh.Invoke();
104 | }
105 | ///
106 | /// Closes the pipeline and the runspace.
107 | ///
108 | public void Close()
109 | {
110 | _posh.Dispose();
111 | _posh.Runspace.Dispose();
112 | }
113 | }
114 |
--------------------------------------------------------------------------------
/1.build.ps1:
--------------------------------------------------------------------------------
1 | <#
2 | .Synopsis
3 | Build script, https://github.com/nightroman/Invoke-Build
4 | #>
5 |
6 | param(
7 | $Configuration = 'Release'
8 | )
9 |
10 | Set-StrictMode -Version 3
11 | $_name = 'SplitPipeline'
12 | $_root = "$env:ProgramFiles\WindowsPowerShell\Modules\$_name"
13 |
14 | # Synopsis: Remove temp files.
15 | task clean {
16 | remove z, Src\bin, Src\obj, README.html
17 | }
18 |
19 | # Synopsis: Generate meta files.
20 | task meta -Inputs $BuildFile, Release-Notes.md -Outputs "Module\$_name.psd1", Src\Directory.Build.props -Jobs version, {
21 | $Project = 'https://github.com/nightroman/SplitPipeline'
22 | $Summary = 'SplitPipeline - Parallel Data Processing in PowerShell'
23 | $Copyright = 'Copyright (c) Roman Kuzmin'
24 |
25 | Set-Content "Module\$_name.psd1" @"
26 | @{
27 | Author = 'Roman Kuzmin'
28 | ModuleVersion = '$_version'
29 | Description = '$Summary'
30 | CompanyName = '$Project'
31 | Copyright = '$Copyright'
32 |
33 | RootModule = '$_name.dll'
34 |
35 | PowerShellVersion = '5.1'
36 | GUID = '7806b9d6-cb68-4e21-872a-aeec7174a087'
37 |
38 | CmdletsToExport = 'Split-Pipeline'
39 | FunctionsToExport = @()
40 | VariablesToExport = @()
41 | AliasesToExport = @()
42 |
43 | PrivateData = @{
44 | PSData = @{
45 | Tags = 'Parallel', 'Pipeline', 'Runspace', 'Invoke', 'Foreach'
46 | LicenseUri = 'http://www.apache.org/licenses/LICENSE-2.0'
47 | ProjectUri = 'https://github.com/nightroman/SplitPipeline'
48 | ReleaseNotes = 'https://github.com/nightroman/SplitPipeline/blob/main/Release-Notes.md'
49 | }
50 | }
51 | }
52 | "@
53 |
54 | Set-Content Src\Directory.Build.props @"
55 |
56 |
57 | $Project
58 | $Copyright
59 | $Summary
60 | $_name
61 | $_version
62 | False
63 |
64 |
65 | "@
66 | }
67 |
68 | # Synopsis: Build, publish in post-build, make help.
69 | task build meta, {
70 | exec { dotnet build "Src\$_name.csproj" -c $Configuration --tl:off }
71 | }
72 |
73 | # Synopsis: Publish the module (post-build).
74 | task publish {
75 | exec { robocopy Module $_root /s /xf *-Help.ps1 } (0..3)
76 | exec { dotnet publish Src\$_name.csproj --no-build -c $Configuration -o $_root }
77 | remove $_root\System.Management.Automation.dll, $_root\*.deps.json
78 | }
79 |
80 | # Synopsis: Build help by https://github.com/nightroman/Helps
81 | task help -After ?build -Inputs @(Get-Item Src\*.cs, "Module\en-US\$_name.dll-Help.ps1") -Outputs "$_root\en-US\$_name.dll-Help.xml" {
82 | . Helps.ps1
83 | Convert-Helps "Module\en-US\$_name.dll-Help.ps1" $Outputs
84 | }
85 |
86 | # Synopsis: Set $Script:_version.
87 | task version {
88 | ($Script:_version = Get-BuildVersion Release-Notes.md '##\s+v(\d+\.\d+\.\d+)')
89 | }
90 |
91 | # Synopsis: Convert markdown files to HTML.
92 | task markdown {
93 | exec { pandoc.exe --standalone --from=gfm --output=README.html --metadata=pagetitle=$_name README.md }
94 | }
95 |
96 | # Synopsis: Make the package.
97 | task package markdown, version, {
98 | equals $_version (Get-Item $_root\$_name.dll).VersionInfo.ProductVersion
99 | equals ([Version]$_version) (Get-Module $_name -ListAvailable).Version
100 |
101 | remove z
102 | exec { robocopy $_root z\$_name /s /xf *.pdb } (0..3)
103 |
104 | Copy-Item LICENSE, README.html -Destination z\$_name
105 |
106 | Assert-SameFile.ps1 -Result (Get-ChildItem z\$_name -Recurse -File -Name) -Text -View $env:MERGE @'
107 | LICENSE
108 | README.html
109 | SplitPipeline.dll
110 | SplitPipeline.psd1
111 | en-US\about_SplitPipeline.help.txt
112 | en-US\SplitPipeline.dll-Help.xml
113 | '@
114 | }
115 |
116 | # Synopsis: Make and push the PSGallery package.
117 | task pushPSGallery package, {
118 | $NuGetApiKey = Read-Host NuGetApiKey
119 | Publish-Module -Path z\$_name -NuGetApiKey $NuGetApiKey
120 | },
121 | clean
122 |
123 | # Synopsis: Push to the repository with a version tag.
124 | task pushRelease version, {
125 | $changes = exec { git status --short }
126 | assert (!$changes) "Please, commit changes."
127 |
128 | exec { git push }
129 | exec { git tag -a "v$_version" -m "v$_version" }
130 | exec { git push origin "v$_version" }
131 | }
132 |
133 | # Synopsis: Run tests.
134 | task test {
135 | Invoke-Build ** Tests
136 | }
137 |
138 | # Synopsis: Test Core.
139 | task core {
140 | exec { pwsh -NoProfile -Command Invoke-Build test }
141 | }
142 |
143 | # Synopsis: Test Desktop.
144 | task desktop {
145 | exec { powershell -NoProfile -Command Invoke-Build test }
146 | }
147 |
148 | # Synopsis: Test editions.
149 | task tests desktop, core
150 |
151 | # Synopsis: Build and clean.
152 | task . build, clean
153 |
--------------------------------------------------------------------------------
/Release-Notes.md:
--------------------------------------------------------------------------------
1 | # SplitPipeline Release Notes
2 |
3 | ## v2.0.1
4 |
5 | Avoid double warnings, #29.
6 |
7 | ## v2.0.0
8 |
9 | - Designed for Windows PowerShell 5.1 and PowerShell Core
10 | - Built with PowerShellStandard.Library
11 | - Published at PSGallery only
12 |
13 | ## v1.6.3
14 |
15 | Add help about error preference, #30.
16 |
17 | ## v1.6.2
18 |
19 | Fixed #29, `WarningVariable` should be populated.
20 |
21 | ## v1.6.1
22 |
23 | New helper `$Pipeline.Lock(script)` for mutually exclusive operations, #25.
24 | It is not designed for usual scenarios because it "breaks" parallel flows.
25 |
26 | ## v1.6.0
27 |
28 | Input objects may be provided using the parameter `InputObject`, #19.
29 |
30 | ## v1.5.3
31 |
32 | Packaged and published as PSGallery module.
33 |
34 | ## v1.5.2
35 |
36 | Fixed #12 `VerbosePreference` can be any value.
37 |
38 | ## v1.5.1
39 |
40 | Fixed #10 Tight loop in `EndProcessing()`
41 |
42 | ## v1.5.0
43 |
44 | `Count` accepts one or two values. One is as usual. Two values limit the number
45 | of required pipelines also taking into account the number of processors. (Too
46 | many pipelines on machines with many cores is not always optimal.)
47 |
48 | Corrected the test/demo script *Test-ProgressTotal.ps1*.
49 |
50 | Minor performance tweaks on creation of runspaces.
51 |
52 | ## v1.4.3
53 |
54 | Fixed duplicated debug, warning, and verbose messages (v1.4.2).
55 |
56 | ## v1.4.2
57 |
58 | Pipeline runspaces are created with the host used by `Split-Pipeline`. As a
59 | result, some host features can be used by pipeline scripts, e.g. `Write-Host`
60 | and even `Write-Progress`, see `Test-Progress*.ps1` in the project repository.
61 |
62 | ## v1.4.1
63 |
64 | If the minimum `Load` is less than 1 then the parameter is treated as omitted.
65 |
66 | ## v1.4.0
67 |
68 | *Potentially incompatible change*. By default, i.e. when `Load` is omitted, the
69 | whole input is collected and split evenly between parallel pipelines. This way
70 | seems to be the most effective in simple cases. In other cases, e.g. on large
71 | or slow input, `Load` should be used in order to enable processing of input
72 | parts and specify their limits.
73 |
74 | Corrected input item count in `Refill` mode in verbose statistics.
75 |
76 | Refactoring of ending, closing, and stopping.
77 |
78 | ## v1.3.1
79 |
80 | Removed the obsolete switch `Auto` and pieces of old code.
81 |
82 | ## v1.3.0
83 |
84 | Reviewed automatic load balancing, made it the default and less aggressive
85 | (*potentially incompatible change*). The obsolete switch `Auto` still exists
86 | but it is ignored. Use the parameter `Load` in order to specify part limits.
87 | E.g. `-Load N,N` tells to use N items per pipeline, i.e. no load balancing.
88 |
89 | In order words: a) `Auto` is slightly redundant with `Load`; b) not using
90 | `Auto`, e.g. forgetting, often causes less effective work. `Auto` will be
91 | removed in the next version.
92 |
93 | Improved stopping (e.g. by `[Ctrl-C]`):
94 |
95 | - Fixed some known and some potential issues.
96 | - The `Finally` script should work on stopping.
97 |
98 | Amended verbose messages. They are for:
99 |
100 | - Each job feed with current data.
101 | - End of processing with end data.
102 | - Summary with totals.
103 |
104 | ## v1.2.1
105 |
106 | Added processing of `StopProcessing()` which is called on `[Ctrl-C]`. Note that
107 | stopping is normally not recommended. But in some cases "under construction" it
108 | may help, e.g. [#3](https://github.com/nightroman/SplitPipeline/issues/3).
109 |
110 | ## v1.2.0
111 |
112 | Debug streams of parallel pipelines are processed as well and debug messages
113 | are propagated to the main pipeline, just like errors, warnings, and verbose
114 | messages.
115 |
116 | ## v1.1.0
117 |
118 | New parameter `ApartmentState`.
119 |
120 | ## v1.0.1
121 |
122 | Help. Mentioned why and when to use `Variable`, `Function`, and `Module`. Added
123 | the related example.
124 |
125 | ## v1.0.0
126 |
127 | Minor cosmetic changes in help and code. The API seems to be stabilized and no
128 | issues were found for a while. Changed the status from "beta" to "release".
129 |
130 | ## v0.4.1
131 |
132 | Refactoring and minor improvements.
133 |
134 | ## v0.4.0
135 |
136 | Revision of parameters and automatic load balancing (mostly simplification).
137 | Joined parameters Load and Limit into the single parameter Load (one or two
138 | values). Removed parameters Cost (not needed now) and Queue (Load is used in
139 | order to limit the queue).
140 |
141 | ## v0.3.2
142 |
143 | Minor tweaks.
144 |
145 | ## v0.3.1
146 |
147 | Refilled input makes infinite loops possible in some scenarios. Use the new
148 | parameter `Filter` in order to exclude already processed objects and avoid
149 | loops.
150 |
151 | ## v0.3.0
152 |
153 | New switch `Refill` tells to refill the input queue from output. `[ref]`
154 | objects are intercepted and added to the input queue. Other objects go to
155 | output as usual. See an example in help and `Test-Refill.ps1`.
156 |
157 | Tweaks in feeding parallel pipelines and automatic tuning of load.
158 |
159 | ## v0.2.0
160 |
161 | New switch `Order` tells to output part results in the same order as input
162 | parts arrive. Thus, although order of processing is not predictable, output
163 | order can be made predictable. This feature open doors for more scenarios.
164 |
165 | Added checks for `Stopping` in `EndProcessing` (faster stop on `Ctrl+C`).
166 |
167 | ## v0.1.1
168 |
169 | Tweaks, including related to PowerShell V3 CTP2.
170 |
171 | ## v0.1.0
172 |
173 | New switch `Auto` is used in order to determine Load values automatically during
174 | processing. Use `Verbose` in order to view some related information. Yet another
175 | new parameter `Cost` is used together with `Auto`; it is introduced rather for
176 | experiments.
177 |
178 | ## v0.0.1
179 |
180 | This is the first of v0 series (pre-release versions). Cmdlet parameters and
181 | behaviour may change.
182 |
183 | The cmdlet Split-Pipeline passes simple tests and shows good performance gain
184 | in a few practical scenarios.
185 |
186 | Failures, errors, warnings, and verbose messages from parallel pipelines are
187 | trivial, straightforward, and perhaps not useful enough for troubleshooting.
188 |
--------------------------------------------------------------------------------
/Module/en-US/SplitPipeline.dll-Help.ps1:
--------------------------------------------------------------------------------
1 |
2 | <#
3 | .Synopsis
4 | Help script (https://github.com/nightroman/Helps)
5 | #>
6 |
7 | # Import the module to make commands available for the builder.
8 | Import-Module SplitPipeline
9 |
10 | ### Split-Pipeline command help
11 | @{
12 | command = 'Split-Pipeline'
13 | synopsis = @'
14 | Splits pipeline input and processes its parts by parallel pipelines.
15 | '@
16 | description = @'
17 | The cmdlet splits the input, processes its parts by parallel pipelines, and
18 | outputs the results for further processing. It may work without collecting
19 | the whole input, large or infinite.
20 |
21 | When Load is omitted the whole input is collected and split evenly between
22 | Count parallel pipelines. This method shows the best performance in simple
23 | cases. In other cases, e.g. on large or slow input, Load should be used in
24 | order to enable processing of partially collected input.
25 |
26 | The cmdlet creates several pipelines. Each pipeline is created when input
27 | parts are available, created pipelines are busy, and their number is less
28 | than Count. Each pipeline is used for processing one or more input parts.
29 |
30 | Because each pipeline works in its own runspace variables, functions, and
31 | modules from the main script are not automatically available for pipeline
32 | scripts. Such items should be specified by Variable, Function, and Module
33 | parameters in order to be available.
34 |
35 | The Begin and End scripts are invoked for each created pipeline once before
36 | and after processing. Each input part is piped to the script block Script.
37 | The Finally script is invoked after all, even on failures or stopping.
38 |
39 | If number of created pipelines is equal to Count and all pipelines are busy
40 | then incoming input items are enqueued for later processing. If the queue
41 | size hits the limit then the algorithm waits for any pipeline to complete.
42 |
43 | Input parts are not necessarily processed in the same order as they come.
44 | But output parts can be ordered according to input, use the switch Order.
45 |
46 | In rare scenarios when synchronous code must be invoked in pipelines,
47 | use the helper $Pipeline.Lock, see the repository tests for examples.
48 |
49 | ERROR PREFERENCE
50 |
51 | If the current error preference is Stop and the internal pipelines emit
52 | errors (even non-terminating) then Split-Pipeline treats these errors as
53 | terminating per its current environment. To avoid this consider using
54 | -ErrorAction Continue.
55 | '@
56 | parameters = @{
57 | Script = @'
58 | The script invoked for each input part of each pipeline with an input
59 | part piped to it. The script either processes the whole part ($input)
60 | or each item ($_) separately in the "process" block. Examples:
61 |
62 | # Process the whole $input part:
63 | ... | Split-Pipeline { $input | %{ $_ } }
64 |
65 | # Process input items $_ separately:
66 | ... | Split-Pipeline { process { $_ } }
67 |
68 | The script may have any of "begin", "process", and "end" blocks:
69 |
70 | ... | Split-Pipeline { begin {...} process { $_ } end {...} }
71 |
72 | Note that "begin" and "end" blocks are called for each input part but
73 | scripts defined by parameters Begin and End are called for pipelines.
74 | '@
75 | InputObject = @'
76 | Input objects processed by parallel pipelines. Normally this parameter
77 | is not used directly, objects are sent using the pipeline. But it is
78 | fine to specify the input using this parameter.
79 | '@
80 | Begin = @'
81 | The script invoked for each pipeline on creation before processing. The
82 | goal is to initialize the runspace to be used by the pipeline, normally
83 | to set some variables, dot-source scripts, import modules, and etc.
84 | '@
85 | End = @'
86 | The script invoked for each pipeline once after processing. The goal
87 | is, for example, to output some results accumulated during processing
88 | of input parts by the pipeline. Consider to use Finally for releasing
89 | resources instead of End or in addition to it.
90 | '@
91 | Finally = @'
92 | The script invoked for each opened pipeline before its closing, even on
93 | terminating errors or stopping (Ctrl-C). It is normally needed in order
94 | to release resources created by Begin. Output is ignored. If Finally
95 | fails then its errors are written as warnings because it has to be
96 | called for remaining pipelines.
97 | '@
98 | Filter = @'
99 | Either a hashtable for collecting unique input objects or a script used
100 | in order to test an input object. Input includes extra objects added in
101 | Refill mode. In fact, this filter is mostly needed for Refill.
102 |
103 | A hashtable is used in order to collect and enqueue unique objects. In
104 | Refill mode it may be useful for avoiding infinite loops.
105 |
106 | A script is invoked in a child scope of the scope where the cmdlet is
107 | invoked. The first argument is an object being tested. Returned $true
108 | tells to add an object to the input queue.
109 | '@
110 | Count = @'
111 | Specifies the parallel pipeline count. The default value is the number
112 | or processors. For intensive jobs use the default or decreased value,
113 | especially if there are other tasks working at the same time. But for
114 | jobs not consuming much processor resources increasing the number may
115 | improve performance.
116 |
117 | The parameter accepts an array of one or two integers. A single value
118 | specifies the recommended number of pipelines. Two arguments specify
119 | the minimum and maximum numbers and the recommended value is set to
120 | Max(Count[0], Min(Count[1], ProcessorCount)).
121 | '@
122 | Load = @'
123 | Enables processing of partially collected input and specifies input
124 | part limits. If it is omitted then the whole input is collected and
125 | split evenly between pipelines.
126 |
127 | The parameter accepts an array of one or two integers. The first is the
128 | minimum number of objects per pipeline. If it is less than 1 then Load
129 | is treated as omitted. The second number is the optional maximum.
130 |
131 | If processing is fast then it is important to specify a proper minimum.
132 | Otherwise Split-Pipeline may work even slower than a standard pipeline.
133 |
134 | Setting the maximum causes more frequent output. For example, this may
135 | be important for feeding simultaneously working downstream pipelines.
136 |
137 | Setting the maximum number is also needed for potentially large input
138 | in order to limit the input queue size and avoid out of memory issues.
139 | The maximum queue size is set internally to Load[1] * Count.
140 |
141 | Use the switch Verbose in order to get some statistics which may help
142 | to choose suitable load limits.
143 |
144 | CAUTION: The queue limit may be ignored and exceeded if Refill is used.
145 | Any number of objects written via [ref] go straight to the input queue.
146 | Thus, depending on data Refill scenarios may fail due to out of memory.
147 | '@
148 | Variable = @'
149 | Variables imported from the current runspace to parallel.
150 | '@
151 | Function = @'
152 | Functions imported from the current runspace to parallel.
153 | '@
154 | Module = @'
155 | Modules imported to parallel runspaces.
156 | '@
157 | Order = @'
158 | Tells to output part results in the same order as input parts arrive.
159 | The algorithm may work slower.
160 | '@
161 | Refill = @'
162 | Tells to refill the input by [ref] objects from output. Other objects
163 | go to output as usual. This convention is used for processing items of
164 | hierarchical data structures: child container items come back to input,
165 | leaf items or other data produced by processing go to output.
166 |
167 | NOTE: Refilled input makes infinite loops possible for some data. Use
168 | Filter in order to exclude already processed objects and avoid loops.
169 | '@
170 | ApartmentState = @'
171 | Specify either "MTA" (multi-threaded ) or "STA" (single-threaded) for
172 | the apartment states of the threads used to run commands in pipelines.
173 | '@
174 | }
175 | inputs = @(
176 | @{
177 | type = 'Object'
178 | description = @'
179 | Input objects processed by parallel pipelines.
180 | '@
181 | }
182 | )
183 | outputs = @(
184 | @{
185 | type = 'Object'
186 | description = @'
187 | Output of the Begin, Script, and End script blocks. The scripts Begin
188 | and End are invoked once for each pipeline before and after processing.
189 | The script Script is invoked repeatedly with input parts piped to it.
190 | '@
191 | }
192 | )
193 | examples = @(
194 | @{
195 | code = {
196 | 1..10 | . {process{ $_; sleep 1 }}
197 | 1..10 | Split-Pipeline -Count 10 {process{ $_; sleep 1 }}
198 | }
199 | remarks = @'
200 | Two commands perform the same job simulating long but not processor
201 | consuming operations on each item. The first command takes about 10
202 | seconds. The second takes about 2 seconds due to Split-Pipeline.
203 | '@
204 | test = { . $args[0] }
205 | }
206 | @{
207 | code = {
208 | $PSHOME | Split-Pipeline -Refill {process{
209 | foreach($item in Get-ChildItem -LiteralPath $_ -Force) {
210 | if ($item.PSIsContainer) {
211 | [ref]$item.FullName
212 | }
213 | else {
214 | $item.Length
215 | }
216 | }
217 | }} | Measure-Object -Sum
218 | }
219 | remarks = @'
220 | This is an example of Split-Pipeline with refilled input. By the convention
221 | output [ref] objects refill the input, other objects go to output as usual.
222 |
223 | The code calculates the number and size of files in $PSHOME. It is a "how
224 | to" sample, performance gain is not expected because the code is trivial
225 | and works relatively fast.
226 |
227 | See also another example with simulated slow data requests:
228 | https://github.com/nightroman/SplitPipeline/blob/main/Tests/Test-Refill.ps1
229 | '@
230 | test = { . $args[0] }
231 | }
232 | @{
233 | remarks = @'
234 | Because each pipeline works in its own runspace variables, functions, and
235 | modules from the main script are not automatically available for pipeline
236 | scripts. Such items should be specified by Variable, Function, and Module
237 | parameters in order to be available.
238 |
239 | > $arr = @('one', 'two', 'three'); 0..2 | . {process{ $arr[$_] }}
240 | one
241 | two
242 | three
243 |
244 | > $arr = @('one', 'two', 'three'); 0..2 | Split-Pipeline {process{ $arr[$_] }}
245 | Split-Pipeline : Cannot index into a null array.
246 | ...
247 |
248 | > $arr = @('one', 'two', 'three'); 0..2 | Split-Pipeline -Variable arr {process{ $arr[$_] }}
249 | one
250 | two
251 | three
252 | '@
253 | }
254 | )
255 | links = @(
256 | @{ text = 'Project site:'; URI = 'https://github.com/nightroman/SplitPipeline' }
257 | )
258 | }
259 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2011 Roman Kuzmin
2 |
3 | Apache License
4 | Version 2.0, January 2004
5 | http://www.apache.org/licenses/
6 |
7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
8 |
9 | 1. Definitions.
10 |
11 | "License" shall mean the terms and conditions for use, reproduction,
12 | and distribution as defined by Sections 1 through 9 of this document.
13 |
14 | "Licensor" shall mean the copyright owner or entity authorized by
15 | the copyright owner that is granting the License.
16 |
17 | "Legal Entity" shall mean the union of the acting entity and all
18 | other entities that control, are controlled by, or are under common
19 | control with that entity. For the purposes of this definition,
20 | "control" means (i) the power, direct or indirect, to cause the
21 | direction or management of such entity, whether by contract or
22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
23 | outstanding shares, or (iii) beneficial ownership of such entity.
24 |
25 | "You" (or "Your") shall mean an individual or Legal Entity
26 | exercising permissions granted by this License.
27 |
28 | "Source" form shall mean the preferred form for making modifications,
29 | including but not limited to software source code, documentation
30 | source, and configuration files.
31 |
32 | "Object" form shall mean any form resulting from mechanical
33 | transformation or translation of a Source form, including but
34 | not limited to compiled object code, generated documentation,
35 | and conversions to other media types.
36 |
37 | "Work" shall mean the work of authorship, whether in Source or
38 | Object form, made available under the License, as indicated by a
39 | copyright notice that is included in or attached to the work
40 | (an example is provided in the Appendix below).
41 |
42 | "Derivative Works" shall mean any work, whether in Source or Object
43 | form, that is based on (or derived from) the Work and for which the
44 | editorial revisions, annotations, elaborations, or other modifications
45 | represent, as a whole, an original work of authorship. For the purposes
46 | of this License, Derivative Works shall not include works that remain
47 | separable from, or merely link (or bind by name) to the interfaces of,
48 | the Work and Derivative Works thereof.
49 |
50 | "Contribution" shall mean any work of authorship, including
51 | the original version of the Work and any modifications or additions
52 | to that Work or Derivative Works thereof, that is intentionally
53 | submitted to Licensor for inclusion in the Work by the copyright owner
54 | or by an individual or Legal Entity authorized to submit on behalf of
55 | the copyright owner. For the purposes of this definition, "submitted"
56 | means any form of electronic, verbal, or written communication sent
57 | to the Licensor or its representatives, including but not limited to
58 | communication on electronic mailing lists, source code control systems,
59 | and issue tracking systems that are managed by, or on behalf of, the
60 | Licensor for the purpose of discussing and improving the Work, but
61 | excluding communication that is conspicuously marked or otherwise
62 | designated in writing by the copyright owner as "Not a Contribution."
63 |
64 | "Contributor" shall mean Licensor and any individual or Legal Entity
65 | on behalf of whom a Contribution has been received by Licensor and
66 | subsequently incorporated within the Work.
67 |
68 | 2. Grant of Copyright License. Subject to the terms and conditions of
69 | this License, each Contributor hereby grants to You a perpetual,
70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
71 | copyright license to reproduce, prepare Derivative Works of,
72 | publicly display, publicly perform, sublicense, and distribute the
73 | Work and such Derivative Works in Source or Object form.
74 |
75 | 3. Grant of Patent License. Subject to the terms and conditions of
76 | this License, each Contributor hereby grants to You a perpetual,
77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
78 | (except as stated in this section) patent license to make, have made,
79 | use, offer to sell, sell, import, and otherwise transfer the Work,
80 | where such license applies only to those patent claims licensable
81 | by such Contributor that are necessarily infringed by their
82 | Contribution(s) alone or by combination of their Contribution(s)
83 | with the Work to which such Contribution(s) was submitted. If You
84 | institute patent litigation against any entity (including a
85 | cross-claim or counterclaim in a lawsuit) alleging that the Work
86 | or a Contribution incorporated within the Work constitutes direct
87 | or contributory patent infringement, then any patent licenses
88 | granted to You under this License for that Work shall terminate
89 | as of the date such litigation is filed.
90 |
91 | 4. Redistribution. You may reproduce and distribute copies of the
92 | Work or Derivative Works thereof in any medium, with or without
93 | modifications, and in Source or Object form, provided that You
94 | meet the following conditions:
95 |
96 | (a) You must give any other recipients of the Work or
97 | Derivative Works a copy of this License; and
98 |
99 | (b) You must cause any modified files to carry prominent notices
100 | stating that You changed the files; and
101 |
102 | (c) You must retain, in the Source form of any Derivative Works
103 | that You distribute, all copyright, patent, trademark, and
104 | attribution notices from the Source form of the Work,
105 | excluding those notices that do not pertain to any part of
106 | the Derivative Works; and
107 |
108 | (d) If the Work includes a "NOTICE" text file as part of its
109 | distribution, then any Derivative Works that You distribute must
110 | include a readable copy of the attribution notices contained
111 | within such NOTICE file, excluding those notices that do not
112 | pertain to any part of the Derivative Works, in at least one
113 | of the following places: within a NOTICE text file distributed
114 | as part of the Derivative Works; within the Source form or
115 | documentation, if provided along with the Derivative Works; or,
116 | within a display generated by the Derivative Works, if and
117 | wherever such third-party notices normally appear. The contents
118 | of the NOTICE file are for informational purposes only and
119 | do not modify the License. You may add Your own attribution
120 | notices within Derivative Works that You distribute, alongside
121 | or as an addendum to the NOTICE text from the Work, provided
122 | that such additional attribution notices cannot be construed
123 | as modifying the License.
124 |
125 | You may add Your own copyright statement to Your modifications and
126 | may provide additional or different license terms and conditions
127 | for use, reproduction, or distribution of Your modifications, or
128 | for any such Derivative Works as a whole, provided Your use,
129 | reproduction, and distribution of the Work otherwise complies with
130 | the conditions stated in this License.
131 |
132 | 5. Submission of Contributions. Unless You explicitly state otherwise,
133 | any Contribution intentionally submitted for inclusion in the Work
134 | by You to the Licensor shall be under the terms and conditions of
135 | this License, without any additional terms or conditions.
136 | Notwithstanding the above, nothing herein shall supersede or modify
137 | the terms of any separate license agreement you may have executed
138 | with Licensor regarding such Contributions.
139 |
140 | 6. Trademarks. This License does not grant permission to use the trade
141 | names, trademarks, service marks, or product names of the Licensor,
142 | except as required for reasonable and customary use in describing the
143 | origin of the Work and reproducing the content of the NOTICE file.
144 |
145 | 7. Disclaimer of Warranty. Unless required by applicable law or
146 | agreed to in writing, Licensor provides the Work (and each
147 | Contributor provides its Contributions) on an "AS IS" BASIS,
148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149 | implied, including, without limitation, any warranties or conditions
150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151 | PARTICULAR PURPOSE. You are solely responsible for determining the
152 | appropriateness of using or redistributing the Work and assume any
153 | risks associated with Your exercise of permissions under this License.
154 |
155 | 8. Limitation of Liability. In no event and under no legal theory,
156 | whether in tort (including negligence), contract, or otherwise,
157 | unless required by applicable law (such as deliberate and grossly
158 | negligent acts) or agreed to in writing, shall any Contributor be
159 | liable to You for damages, including any direct, indirect, special,
160 | incidental, or consequential damages of any character arising as a
161 | result of this License or out of the use or inability to use the
162 | Work (including but not limited to damages for loss of goodwill,
163 | work stoppage, computer failure or malfunction, or any and all
164 | other commercial damages or losses), even if such Contributor
165 | has been advised of the possibility of such damages.
166 |
167 | 9. Accepting Warranty or Additional Liability. While redistributing
168 | the Work or Derivative Works thereof, You may choose to offer,
169 | and charge a fee for, acceptance of support, warranty, indemnity,
170 | or other liability obligations and/or rights consistent with this
171 | License. However, in accepting such obligations, You may act only
172 | on Your own behalf and on Your sole responsibility, not on behalf
173 | of any other Contributor, and only if You agree to indemnify,
174 | defend, and hold each Contributor harmless for any liability
175 | incurred by, or claims asserted against, such Contributor by reason
176 | of your accepting any such warranty or additional liability.
177 |
178 | END OF TERMS AND CONDITIONS
179 |
--------------------------------------------------------------------------------
/Src/SplitPipelineCommand.cs:
--------------------------------------------------------------------------------
1 | using System.Collections;
2 | using System.Diagnostics;
3 | using System.Management.Automation;
4 | using System.Management.Automation.Runspaces;
5 | using System.Reflection;
6 |
7 | namespace SplitPipeline;
8 |
9 | [Cmdlet(VerbsCommon.Split, "Pipeline")]
10 | public sealed class SplitPipelineCommand : PSCmdlet, IDisposable
11 | {
12 | [Parameter(Position = 0, Mandatory = true)]
13 | public ScriptBlock Script { get; set; }
14 |
15 | [Parameter(Position = 1, ValueFromPipeline = true)]
16 | public PSObject InputObject { get; set; }
17 |
18 | [Parameter]
19 | public ScriptBlock Begin { get; set; }
20 |
21 | [Parameter]
22 | public ScriptBlock End { get; set; }
23 |
24 | [Parameter]
25 | public ScriptBlock Finally { get; set; }
26 |
27 | [Parameter]
28 | public string[] Variable { get; set; }
29 |
30 | [Parameter]
31 | public string[] Function { get; set; }
32 |
33 | [Parameter]
34 | public string[] Module { get; set; }
35 |
36 | [Parameter]
37 | [ValidateCount(1, 2)]
38 | public int[] Count
39 | {
40 | get { return null; }
41 | set
42 | {
43 | if (value[0] < 1)
44 | return;
45 |
46 | if (value.Length == 1)
47 | _Count = value[0];
48 | else if (value[0] > value[1])
49 | throw new PSArgumentException("Count maximum must be greater or equal to minimum.");
50 | else
51 | _Count = Math.Max(value[0], Math.Min(value[1], Environment.ProcessorCount));
52 | }
53 | }
54 | int _Count;
55 |
56 | [Parameter]
57 | public SwitchParameter Order { get; set; }
58 |
59 | [Parameter]
60 | public SwitchParameter Refill { get; set; }
61 |
62 | [Parameter]
63 | [ValidateCount(1, 2)]
64 | public int[] Load
65 | {
66 | get { return _Load; }
67 | set
68 | {
69 | if (value[0] < 1)
70 | return;
71 |
72 | if (value.Length == 2 && value[0] > value[1])
73 | throw new PSArgumentException("Load maximum must be greater than or equal to minimum.");
74 |
75 | _Load = value;
76 | MinLoad = value[0];
77 | if (value.Length == 2)
78 | MaxLoad = value[1];
79 | }
80 | }
81 | int[] _Load;
82 | int MinLoad = 1;
83 | int MaxLoad = int.MaxValue;
84 | int MaxQueue = int.MaxValue;
85 |
86 | [Parameter]
87 | public PSObject Filter
88 | {
89 | get { return _Filter; }
90 | set
91 | {
92 | if (value != null)
93 | {
94 | _Filter = value;
95 | _FilterHash = value.BaseObject as IDictionary;
96 | if (_FilterHash == null)
97 | {
98 | _FilterScript = value.BaseObject as ScriptBlock;
99 | if (_FilterScript == null)
100 | throw new PSArgumentException("Expected a hashtable or a script block.");
101 | }
102 | }
103 | }
104 | }
105 | PSObject _Filter;
106 | IDictionary _FilterHash;
107 | ScriptBlock _FilterScript;
108 |
109 | [Parameter]
110 | public ApartmentState ApartmentState
111 | {
112 | set => _ApartmentState = value;
113 | }
114 | ApartmentState? _ApartmentState;
115 |
116 | readonly InitialSessionState _iss = InitialSessionState.CreateDefault();
117 | readonly Queue _queue = new Queue();
118 | readonly LinkedList _done = new LinkedList();
119 | readonly LinkedList _work = new LinkedList();
120 | readonly Stopwatch _infoTimeTotal = Stopwatch.StartNew();
121 | readonly object _syncObject = new object();
122 | string _Script, _Begin, _End, _Finally;
123 | bool xStop;
124 | bool _closed;
125 | bool _verbose;
126 | bool _isExpectingInput;
127 | int _infoItemCount;
128 | int _infoPartCount;
129 | int _infoWaitCount;
130 | int _infoMaxQueue;
131 |
132 | protected override void BeginProcessing()
133 | {
134 | // convert scripts to strings
135 | _Script = Script.ToString();
136 | if (Begin != null)
137 | _Begin = Begin.ToString();
138 | if (End != null)
139 | _End = End.ToString();
140 | if (Finally != null)
141 | _Finally = Finally.ToString();
142 |
143 | // Count
144 | if (_Count <= 0)
145 | _Count = Environment.ProcessorCount;
146 |
147 | // MaxQueue after Count
148 | if (MaxLoad < int.MaxValue / _Count)
149 | MaxQueue = _Count * MaxLoad;
150 |
151 | // to import modules
152 | if (Module != null)
153 | _iss.ImportPSModule(Module);
154 |
155 | // import variables
156 | _iss.Variables.Add(new SessionStateVariableEntry("LogEngineLifeCycleEvent", false, string.Empty)); // whole log disabled
157 | _iss.Variables.Add(new SessionStateVariableEntry("LogProviderLifeCycleEvent", false, string.Empty)); // start is still logged
158 | _iss.Variables.Add(new SessionStateVariableEntry("Pipeline", new Helper(), "Pipeline helper"));
159 | if (Variable != null)
160 | {
161 | foreach (var name in Variable)
162 | _iss.Variables.Add(new SessionStateVariableEntry(name, GetVariableValue(name), string.Empty));
163 | }
164 |
165 | // import functions
166 | if (Function != null)
167 | {
168 | foreach (var name in Function)
169 | {
170 | var function = (FunctionInfo)SessionState.InvokeCommand.GetCommand(name, CommandTypes.Function);
171 | _iss.Commands.Add(new SessionStateFunctionEntry(name, function.Definition));
172 | }
173 | }
174 |
175 | // verbose state
176 | if (MyInvocation.BoundParameters.TryGetValue("Verbose", out object parameter))
177 | {
178 | _verbose = ((SwitchParameter)parameter).ToBool();
179 | }
180 | else
181 | {
182 | // #12 VerbosePreference value can be anything
183 | if (LanguagePrimitives.TryConvertTo(GetVariableValue("VerbosePreference"), out ActionPreference preference))
184 | _verbose = preference != ActionPreference.SilentlyContinue;
185 | }
186 |
187 | // if items are sent as the parameter then enque them
188 | _isExpectingInput = MyInvocation.ExpectingInput;
189 | if (!_isExpectingInput)
190 | {
191 | var items = LanguagePrimitives.GetEnumerable(InputObject);
192 | if (items == null)
193 | {
194 | _isExpectingInput = true;
195 | }
196 | else
197 | {
198 | foreach (var it in items)
199 | if (it == null)
200 | Enqueue(null);
201 | else
202 | Enqueue(PSObject.AsPSObject(it));
203 | }
204 | }
205 | }
206 | protected override void ProcessRecord()
207 | {
208 | try
209 | {
210 | // add to the queue
211 | if (_isExpectingInput)
212 | Enqueue(InputObject);
213 |
214 | // simple mode or too few items for a job?
215 | if (Load == null || _queue.Count < MinLoad)
216 | return;
217 |
218 | // force feed while the queue is too large;
219 | // NB: Feed with Refill may add new items
220 | while (_queue.Count >= MaxQueue && !xStop)
221 | Feed(true);
222 |
223 | // try to feed available jobs normally
224 | if (_queue.Count >= MinLoad && !xStop)
225 | Feed(false);
226 | }
227 | catch
228 | {
229 | // ignore errors on stopping
230 | if (!xStop)
231 | throw;
232 | }
233 | }
234 | protected override void EndProcessing()
235 | {
236 | try
237 | {
238 | // force feed while there are items or working jobs
239 | // NB: jobs with Refill may add new items
240 | while (_queue.Count > 0 || _work.Count > 0)
241 | {
242 | if (xStop)
243 | return;
244 |
245 | // verbose info
246 | if (_verbose)
247 | WriteVerbose(string.Format(null, "Split-Pipeline: Jobs = {0}; Load = End; Queue = {1}", _work.Count, _queue.Count));
248 |
249 | // #10 nothing to feed, wait
250 | if (_queue.Count == 0)
251 | Wait();
252 |
253 | Feed(true);
254 | }
255 |
256 | // summary info
257 | if (xStop)
258 | return;
259 | if (_verbose)
260 | WriteVerbose(string.Format(null, @"Split-Pipeline:
261 | Item count = {0}
262 | Part count = {1}
263 | Pipe count = {2}
264 | Wait count = {3}
265 | Max queue = {4}
266 | Total time = {5}
267 | Items /sec = {6}
268 | ", _infoItemCount
269 | , _infoPartCount
270 | , _done.Count
271 | , _infoWaitCount
272 | , _infoMaxQueue
273 | , _infoTimeTotal.Elapsed
274 | , _infoItemCount / _infoTimeTotal.Elapsed.TotalSeconds));
275 |
276 | // invoke the end script
277 | if (_End != null)
278 | {
279 | foreach (var job in _done)
280 | {
281 | if (xStop)
282 | return;
283 | WriteResults(job, job.InvokeEnd(_End));
284 | }
285 | }
286 | }
287 | catch
288 | {
289 | // ignore errors on stopping
290 | if (!xStop)
291 | throw;
292 | }
293 | }
294 | protected override void StopProcessing()
295 | {
296 | xStop = true;
297 | Close();
298 | }
299 | public void Dispose()
300 | {
301 | if (!_closed)
302 | Close();
303 | }
304 |
305 | ///
306 | /// Adds the object to the queue unless it is filtered out.
307 | /// Callers check the maximum queue count.
308 | ///
309 | void Enqueue(PSObject value)
310 | {
311 | // filter
312 | if (Filter != null)
313 | {
314 | if (_FilterHash != null)
315 | {
316 | if (_FilterHash.Contains(value.BaseObject))
317 | return;
318 |
319 | _FilterHash.Add(value, null);
320 | }
321 | else
322 | {
323 | if (!LanguagePrimitives.IsTrue(_FilterScript.InvokeReturnAsIs(value)))
324 | return;
325 | }
326 | }
327 |
328 | // enqueue
329 | _queue.Enqueue(value);
330 |
331 | // update info
332 | ++_infoItemCount;
333 | if (_infoMaxQueue < _queue.Count)
334 | _infoMaxQueue = _queue.Count;
335 | }
336 | ///
337 | /// Gets the next part of input items and feeds them to a ready job.
338 | /// If forced waits for a ready job.
339 | ///
340 | void Feed(bool force)
341 | {
342 | // try to make more jobs ready and more input available on Refill
343 | Take();
344 |
345 | // no input? check this after taking, Refill adds input on taking
346 | if (_queue.Count == 0)
347 | return;
348 |
349 | // all busy?
350 | if (_Count - _work.Count == 0)
351 | {
352 | // no ready jobs, done if not forced
353 | if (!force)
354 | return;
355 |
356 | // wait for jobs and make them ready
357 | Wait();
358 | Take();
359 | }
360 |
361 | // split the queue equally between all potential jobs
362 | int load = _queue.Count / _Count;
363 | if (load * _Count < _queue.Count)
364 | ++load;
365 |
366 | // check limits
367 | if (load < MinLoad)
368 | load = MinLoad;
369 | else if (load > MaxLoad)
370 | load = MaxLoad;
371 |
372 | lock (_syncObject)
373 | {
374 | int nReadyJobs = _Count - _work.Count;
375 | if (xStop || nReadyJobs == 0)
376 | return;
377 |
378 | do
379 | {
380 | // limit load by the queue
381 | if (load > _queue.Count)
382 | {
383 | load = _queue.Count;
384 |
385 | // if load is less than minimum and not forced then exit
386 | if (load < MinLoad && !force)
387 | return;
388 | }
389 |
390 | // next job node
391 | LinkedListNode node = _done.First;
392 | if (node == null)
393 | {
394 | // v1.4.2 Runspaces use the same host as the cmdlet.
395 | var runspace = RunspaceFactory.CreateRunspace(Host, _iss);
396 | if (_ApartmentState.HasValue)
397 | {
398 | var info = typeof(Runspace).GetProperty("ApartmentState", BindingFlags.Public | BindingFlags.Instance);
399 | info.SetValue(runspace, _ApartmentState.Value, null);
400 | }
401 |
402 | var job = new Job(runspace);
403 | node = new LinkedListNode(job);
404 | _work.AddLast(node);
405 | WriteResults(job, job.InvokeBegin(_Begin, _Script));
406 | }
407 | else
408 | {
409 | _done.RemoveFirst();
410 | _work.AddLast(node);
411 | }
412 |
413 | if (xStop)
414 | return;
415 |
416 | // feed the job
417 | ++_infoPartCount;
418 | node.Value.BeginInvoke(_queue, load);
419 |
420 | // show feed info
421 | if (_verbose)
422 | WriteVerbose(string.Format(null, "Split-Pipeline: Jobs = {0}; Load = {1}; Queue = {2}", _work.Count, load, _queue.Count));
423 | }
424 | while (!xStop && --nReadyJobs > 0 && _queue.Count > 0);
425 | }
426 | }
427 | ///
428 | /// Finds finished jobs, writes their output, moves them to done.
429 | /// If Order stops on the first found working job, it should finish.
430 | ///
431 | void Take()
432 | {
433 | lock (_syncObject)
434 | {
435 | var node = _work.First;
436 | while (node != null)
437 | {
438 | if (node.Value.IsWorking)
439 | {
440 | if (Order)
441 | break;
442 |
443 | node = node.Next;
444 | continue;
445 | }
446 |
447 | // complete the job
448 | var job = node.Value;
449 | if (xStop)
450 | return;
451 | WriteResults(job, job.EndInvoke());
452 |
453 | // move node to done, do next
454 | var next = node.Next;
455 | _work.Remove(node);
456 | _done.AddLast(node);
457 | node = next;
458 | }
459 | }
460 | }
461 | ///
462 | /// Waits for any job to finish. If Order then its the first job in the queue.
463 | ///
464 | void Wait()
465 | {
466 | var wait = new List(_Count);
467 |
468 | lock (_syncObject)
469 | {
470 | ++_infoWaitCount;
471 |
472 | if (Order)
473 | {
474 | var node = _work.First;
475 | var job = node.Value;
476 | WriteResults(job, job.EndInvoke());
477 | _work.Remove(node);
478 | _done.AddLast(node);
479 | return;
480 | }
481 |
482 | foreach (var job in _work)
483 | wait.Add(job.WaitHandle);
484 | }
485 |
486 | //! issue #3: used to hang
487 | WaitHandle.WaitAny(wait.ToArray());
488 | }
489 | ///
490 | /// Writes job output objects and propagates streams.
491 | /// Moves refilling objects from output to the queue.
492 | ///
493 | ///
494 | /// v1.4.2 Only errors are propagated, other streams are written to the host.
495 | ///
496 | void WriteResults(Job job, ICollection output)
497 | {
498 | // process output
499 | if (output != null && output.Count > 0)
500 | {
501 | if (Refill)
502 | {
503 | foreach (var it in output)
504 | {
505 | if (it != null)
506 | {
507 | if (it.BaseObject is PSReference reference)
508 | Enqueue(new PSObject(reference.Value));
509 | else
510 | WriteObject(it);
511 | }
512 | }
513 | }
514 | else
515 | {
516 | foreach (var it in output)
517 | WriteObject(it);
518 | }
519 | }
520 |
521 | // process streams
522 | var streams = job.Streams;
523 |
524 | // v1.4.2 Even with the shared host errors must be propagated explicitly.
525 | if (streams.Error.Count > 0)
526 | {
527 | foreach (var record in streams.Error)
528 | WriteError(record);
529 | }
530 |
531 | // ensure warnings are added to the variable
532 | // https://github.com/nightroman/SplitPipeline/issues/29
533 | if (streams.Warning.Count > 0 && MyInvocation.BoundParameters.TryGetValue("WarningVariable", out var warningVariable))
534 | {
535 | var list = (ArrayList)GetVariableValue((string)warningVariable);
536 | foreach (var record in streams.Warning)
537 | list.Add(record);
538 | }
539 |
540 | // v1.4.2 Debug, progress, verbose, and warning messages are written to the host.
541 | // But streams are still populated, so we clear them on writing results.
542 | // NB: It is possible to log these streams in addition.
543 | streams.ClearStreams();
544 | }
545 | ///
546 | /// Moves all jobs to done then for each jobs:
547 | /// -- calls the finally script;
548 | /// -- closes the job.
549 | ///
550 | void Close()
551 | {
552 | lock (_syncObject)
553 | {
554 | // close once
555 | if (_closed)
556 | return;
557 | _closed = true;
558 |
559 | // move jobs to done
560 | while (_work.Count > 0)
561 | {
562 | var node = _work.First;
563 | _work.RemoveFirst();
564 | _done.AddLast(node);
565 | }
566 |
567 | // done?
568 | if (_done.Count == 0)
569 | return;
570 |
571 | // invoke the finally script always, do not throw, closing is ahead
572 | if (_Finally != null)
573 | {
574 | // let them all to work
575 | var exceptions = new List();
576 | foreach (var job in _done)
577 | {
578 | try
579 | {
580 | job.InvokeFinally(_Finally);
581 | }
582 | catch (Exception e)
583 | {
584 | exceptions.Add(e);
585 | }
586 | }
587 |
588 | // then write errors as warnings
589 | if (exceptions.Count > 0 && !xStop)
590 | {
591 | try
592 | {
593 | foreach (var e in exceptions)
594 | WriteWarning("Exception in Finally: " + e.Message);
595 | }
596 | catch (RuntimeException)
597 | { }
598 | }
599 | }
600 |
601 | // close jobs
602 | foreach (var job in _done)
603 | job.Close();
604 | }
605 | }
606 | }
607 |
--------------------------------------------------------------------------------