├── .github └── FUNDING.yml ├── .gitignore ├── Src ├── Helper.cs ├── SplitPipeline.csproj ├── Job.cs └── SplitPipelineCommand.cs ├── Module └── en-US │ ├── about_SplitPipeline.help.txt │ └── SplitPipeline.dll-Help.ps1 ├── Tests ├── Test-Transcript.ps1 ├── Test-ProgressJobs.ps1 ├── Order.test.ps1 ├── Filter.test.ps1 ├── Host.test.ps1 ├── Import.test.ps1 ├── Test-Start-Job.ps1 ├── Test-ProgressTotal2.ps1 ├── Count.test.ps1 ├── Test-ProgressTotal.ps1 ├── Scripts.test.ps1 ├── About.test.ps1 ├── Stopping.test.ps1 ├── Test-Stopping-Random.ps1 ├── Test-Refill.ps1 └── Load.test.ps1 ├── README.md ├── 1.build.ps1 ├── Release-Notes.md └── LICENSE /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [nightroman] 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | obj 3 | z 4 | z.* 5 | *.html 6 | *.user 7 | launchSettings.json 8 | Module/*.psd1 9 | Src/Directory.Build.props 10 | -------------------------------------------------------------------------------- /Src/Helper.cs: -------------------------------------------------------------------------------- 1 | using System.Management.Automation; 2 | 3 | namespace SplitPipeline; 4 | 5 | /// 6 | /// Pipeline helper methods exposed via the variable. 7 | /// 8 | public class Helper 9 | { 10 | /// 11 | /// Invokes the script with mutually exclusive lock. 12 | /// 13 | public object Lock(ScriptBlock script) 14 | { 15 | if (script == null) throw new ArgumentNullException("script"); 16 | lock (this) 17 | { 18 | return script.InvokeReturnAsIs(); 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /Src/SplitPipeline.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | netstandard2.0 4 | 10.0 5 | enable 6 | true 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /Module/en-US/about_SplitPipeline.help.txt: -------------------------------------------------------------------------------- 1 | TOPIC 2 | about_SplitPipeline 3 | 4 | SHORT DESCRIPTION 5 | SplitPipeline - Parallel Data Processing in PowerShell 6 | 7 | LONG DESCRIPTION 8 | The only cmdlet is Split-Pipeline. It splits the input, processes parts by 9 | parallel pipelines, and outputs data for further processing. It may work 10 | without collecting the whole input, large or infinite. 11 | 12 | Get help: 13 | PS> Import-Module SplitPipeline 14 | PS> help -Full Split-Pipeline 15 | 16 | SEE ALSO 17 | Project site: https://github.com/nightroman/SplitPipeline 18 | Split-Pipeline 19 | -------------------------------------------------------------------------------- /Tests/Test-Transcript.ps1: -------------------------------------------------------------------------------- 1 | <# 2 | .Synopsis 3 | How to use transcript with Write-Host in pipelines. 4 | 5 | .Description 6 | This technique works around the issue #25. 7 | 8 | .Link 9 | https://github.com/nightroman/SplitPipeline/issues/25 10 | #> 11 | 12 | Start-Transcript "$env:TEMP\z.log" 13 | 14 | # The helper for Write-Host for pipelines working with transcript. 15 | $helper = New-Module -AsCustomObject -ScriptBlock { 16 | Import-Module Microsoft.PowerShell.Utility 17 | function WriteHost { 18 | Write-Host $args[0] 19 | } 20 | } 21 | 22 | 1..42 | Split-Pipeline -Variable helper -Script {process{ 23 | # call the helper Write-Host using the lock 24 | $Pipeline.Lock({ $helper.WriteHost("log ($_)") }) 25 | 26 | # normal processing 27 | "process ($_)" 28 | }} 29 | 30 | Stop-Transcript 31 | -------------------------------------------------------------------------------- /Tests/Test-ProgressJobs.ps1: -------------------------------------------------------------------------------- 1 | 2 | <# 3 | .Synopsis 4 | How to use Write-Progress in jobs to show each job progress. 5 | 6 | .Description 7 | The Begin script assigns $id to each job using the shared counter $lastId. 8 | $lastId does not have to be synchronised because Begin is invoked for each 9 | job on its creation synchronously. As far as Begin is invoked in a separate 10 | runspace, the counter has to be passed in via Variable. 11 | 12 | Then each job uses its $id as activity ID for Write-Progress so that each 13 | job progress is visualized separately. 14 | #> 15 | 16 | Import-Module SplitPipeline 17 | 18 | $lastId = [ref]0 19 | 20 | 1..100 | Split-Pipeline -Count 5 -Variable lastId { 21 | $data = @($input) 22 | for($1 = 1; $1 -le $data.Count; ++$1) { 23 | Write-Progress -Id $id -Activity "Job $id" -Status Processing -PercentComplete (100*$1/$data.Count) 24 | Start-Sleep -Milliseconds (Get-Random -Maximum 500) 25 | } 26 | } -Begin { 27 | $id = ++$lastId.Value 28 | } 29 | -------------------------------------------------------------------------------- /Tests/Order.test.ps1: -------------------------------------------------------------------------------- 1 | 2 | <# 3 | .Synopsis 4 | Tests Split-Pipeline -Order. 5 | 6 | .Link 7 | Invoked by https://github.com/nightroman/Invoke-Build 8 | #> 9 | 10 | Import-Module SplitPipeline 11 | Set-StrictMode -Version Latest 12 | 13 | task Ordered { 14 | # common parameters of two tests 15 | $param = @{ 16 | Variable = 'lastId' 17 | Count = 3 18 | Load = 1, 5 19 | Begin = { 20 | $id = ++$lastId.Value 21 | } 22 | Script = { 23 | $input 24 | [System.Threading.Thread]::Sleep((3 - $id) * 50) 25 | } 26 | } 27 | 28 | $data = 1..100 29 | $sample = "$data" 30 | 31 | # unordered 32 | $lastId = [ref]-1 33 | ($r = 1..100 | Split-Pipeline @param) 34 | if ("$r" -eq $sample) { Write-Warning "Normally expected unordered data." } 35 | 36 | # ordered 37 | $lastId = [ref]-1 38 | ($r = 1..100 | Split-Pipeline -Order @param) 39 | equals "$r" $sample 40 | 41 | # ordered, 1.6.0 42 | $lastId = [ref]-1 43 | ($r = Split-Pipeline -Order @param (1..100)) 44 | equals "$r" $sample 45 | } 46 | -------------------------------------------------------------------------------- /Tests/Filter.test.ps1: -------------------------------------------------------------------------------- 1 | 2 | <# 3 | .Synopsis 4 | Tests Split-Pipeline -Filter. 5 | 6 | .Link 7 | Invoked by https://github.com/nightroman/Invoke-Build 8 | #> 9 | 10 | Import-Module SplitPipeline 11 | Set-StrictMode -Version Latest 12 | 13 | task Error { 14 | $$ = try { 1..9 | Split-Pipeline {} -Filter 42 } catch { $_ } 15 | assert ("$$" -clike @' 16 | *Exception setting "Filter": "Expected a hashtable or a script block." 17 | '@) 18 | } 19 | 20 | task FilterInputUniqueByScript { 21 | $hash = @{} 22 | 1,1,2,2,3,3,4,4,5,5 | Split-Pipeline -OutVariable OutVariable {$input} -Filter { 23 | if (!$hash.Contains($args[0])) { 24 | $hash.Add($args[0], $null) 25 | $true 26 | } 27 | } 28 | equals $OutVariable.Count 5 29 | equals '1 2 3 4 5' (($OutVariable | Sort-Object) -join ' ') 30 | } 31 | 32 | task FilterInputUniqueByHashtable { 33 | 1,1,2,2,3,3,4,4,5,5 | Split-Pipeline -OutVariable OutVariable {$input} -Filter @{} 34 | equals $OutVariable.Count 5 35 | equals '1 2 3 4 5' (($OutVariable | Sort-Object) -join ' ') 36 | } 37 | -------------------------------------------------------------------------------- /Tests/Host.test.ps1: -------------------------------------------------------------------------------- 1 | 2 | <# 3 | .Synopsis 4 | Tests Split-Pipeline host features. 5 | 6 | .Link 7 | Invoked by https://github.com/nightroman/Invoke-Build 8 | #> 9 | 10 | Import-Module SplitPipeline 11 | Set-StrictMode -Version Latest 12 | $Version = $PSVersionTable.PSVersion.Major 13 | 14 | task ProgressJobs { 15 | exec { PowerShell.exe .\Test-ProgressJobs.ps1 } 16 | } 17 | 18 | task ProgressTotal { 19 | exec { PowerShell.exe .\Test-ProgressTotal.ps1 } 20 | } 21 | 22 | task ProgressTotal2 { 23 | exec { PowerShell.exe .\Test-ProgressTotal2.ps1 } 24 | } 25 | 26 | task WriteHost { 27 | 1..5 | Split-Pipeline -Count 5 -Variable lastId {process{ 28 | Write-Host "Item $_" 29 | "Done $_" 30 | }} 31 | } 32 | 33 | task Transcript -If ($Version -ge 5) { 34 | .\Test-Transcript.ps1 35 | 36 | $r = [IO.File]::ReadAllLines("$env:TEMP\z.log") 37 | assert ($r -contains 'log (1)') 38 | assert ($r -contains 'log (42)') 39 | assert ($r -contains 'process (1)') 40 | assert ($r -contains 'process (42)') 41 | 42 | remove "$env:TEMP\z.log" 43 | } 44 | -------------------------------------------------------------------------------- /Tests/Import.test.ps1: -------------------------------------------------------------------------------- 1 | 2 | <# 3 | .Synopsis 4 | Tests Split-Pipeline -Variable -Function -Module. 5 | 6 | .Link 7 | Invoked by https://github.com/nightroman/Invoke-Build 8 | #> 9 | 10 | Import-Module SplitPipeline 11 | Set-StrictMode -Version Latest 12 | 13 | task ImportVariable { 14 | $value1 = 1 15 | $value2 = 2 16 | $result = 1..10 | Split-Pipeline -Count 2 -Variable value1, value2 { 17 | if ($value1 -ne 1) {throw 'value1'} 18 | if ($value2 -ne 2) {throw 'value2'} 19 | $input 20 | } 21 | equals $result.Count 10 22 | } 23 | 24 | task ImportFunction { 25 | function Function1 {1} 26 | function Function2 {2} 27 | $result = 1..10 | Split-Pipeline -Count 2 -Function Function1, Function2 { 28 | if ((Function1) -ne 1) {throw 'Function1'} 29 | if ((Function2) -ne 2) {throw 'Function2'} 30 | $input 31 | } 32 | equals $result.Count 10 33 | } 34 | 35 | task ImportModule { 36 | $result = 1..10 | Split-Pipeline -Count 2 -Module SplitPipeline { 37 | $input | Split-Pipeline -Count 2 {$input} 38 | } 39 | equals $result.Count 10 40 | } 41 | -------------------------------------------------------------------------------- /Tests/Test-Start-Job.ps1: -------------------------------------------------------------------------------- 1 | <# 2 | .Synopsis 3 | How to use Start-Job for pipelines in separate processes. 4 | 5 | .Description 6 | Use Start-Job to run pipelines in separate processes, e.g. in cases like 7 | https://github.com/nightroman/SplitPipeline/issues/32 8 | 9 | The sample jobs would take ~8 seconds when run sequentially. 10 | With Split-Pipeline and Start-Job they take ~4 seconds. 11 | 12 | Note that Start-Job is relatively expensive and 13 | Split-Pipeline may work slower with faster jobs. 14 | #> 15 | 16 | Import-Module SplitPipeline 17 | 18 | $sw = [System.Diagnostics.Stopwatch]::StartNew() 19 | 20 | $data = 1..8 | Split-Pipeline -Count 4 {process{ 21 | $job = Start-Job -ArgumentList $_ { 22 | # fake time consuming job 23 | Start-Sleep 1 24 | 25 | # output the current item and process ID 26 | [PSCustomObject]@{ 27 | Item = $args[0] 28 | PID = $PID 29 | } 30 | } 31 | $job | Receive-Job -Wait 32 | }} 33 | 34 | [PSCustomObject]@{ 35 | Time = $sw.Elapsed.TotalSeconds 36 | Data = $data 37 | } 38 | -------------------------------------------------------------------------------- /Tests/Test-ProgressTotal2.ps1: -------------------------------------------------------------------------------- 1 | <# 2 | .Synopsis 3 | Test-ProgressTotal.ps1 using the helper $Pipeline.Lock. 4 | 5 | .Description 6 | This sample is the simplified variant of Test-ProgressTotal.ps1. 7 | The helper $Pipeline.Lock was introduced for scenarios like this. 8 | 9 | .Notes 10 | [hashtable]::Synchronized() or concurrent dictionary are tempting but not 11 | suitable for increments or counters due to their not atomic nature. 12 | #> 13 | 14 | Import-Module SplitPipeline 15 | 16 | # input items 17 | $items = 1..100 18 | 19 | # shared data 20 | $data = @{ 21 | Count = $items.Count 22 | Done = 0 23 | } 24 | 25 | $items | Split-Pipeline -Count 5 -Variable data {process{ 26 | # simulate some job 27 | Start-Sleep -Milliseconds (Get-Random -Maximum 500) 28 | 29 | # update and get shared data using the lock 30 | #! covers `InvokeReturnAsIs` instead of `Invoke` 31 | $done = $Pipeline.Lock({ $done = ++$data.Done; $done }) 32 | 33 | # show progress 34 | Write-Progress -Activity "Done $done" -Status Processing -PercentComplete (100 * $done / $data.Count) 35 | }} 36 | 37 | # assert 38 | if ($data.Done -ne $items.Count) { throw 'Processed and input item counts must be equal.' } 39 | -------------------------------------------------------------------------------- /Tests/Count.test.ps1: -------------------------------------------------------------------------------- 1 | 2 | <# 3 | .Synopsis 4 | Tests Split-Pipeline -Count. 5 | 6 | .Link 7 | Invoked by https://github.com/nightroman/Invoke-Build 8 | #> 9 | 10 | Import-Module SplitPipeline 11 | Set-StrictMode -Version Latest 12 | 13 | # Use large enough number of items. Small number may not load all cores. 14 | # Example: 20 items for 8 cores actually gives 7 pipes: 3, 3, .. 2 15 | $ItemCount = 1000 16 | $ProcessorCount = [Environment]::ProcessorCount 17 | 18 | task Error { 19 | # [0] <= 0 ~ default 20 | $r = 1..$ItemCount | Split-Pipeline {@($input).Count} -Count 0, -1 21 | equals $r.Count $ProcessorCount 22 | 23 | $$ = try { 1..9 | Split-Pipeline {} -Count 1, -1 } catch { $_ } 24 | assert ("$$" -clike @' 25 | *Exception setting "Count": "Count maximum must be greater or equal to minimum." 26 | '@) 27 | } 28 | 29 | task LessThanProcessorCount { 30 | $r = @(1..$ItemCount | Split-Pipeline {1} -Count 1, 1) 31 | equals $r.Count 1 32 | } 33 | 34 | task EqualToProcessorCount0 { 35 | $r = @(1..$ItemCount | Split-Pipeline {1} -Count 1, $ProcessorCount) 36 | equals $r.Count $ProcessorCount 37 | } 38 | 39 | task EqualToProcessorCount1 { 40 | $r = @(1..$ItemCount | Split-Pipeline {1} -Count 1, ($ProcessorCount + 1)) 41 | equals $r.Count $ProcessorCount 42 | } 43 | -------------------------------------------------------------------------------- /Tests/Test-ProgressTotal.ps1: -------------------------------------------------------------------------------- 1 | <# 2 | .Synopsis 3 | How to use Write-Progress in jobs to show the total progress. 4 | 5 | .Description 6 | The hashtable $data is used by jobs simultaneously. It contains the total 7 | number of items Count (read only) and the counter of processed items Done 8 | (read and written). These data are used to calculate the percentage for 9 | Write-Progress. 10 | 11 | Note that Done is updated in a critical section. Use of try/finally there 12 | may be redundant in this trivial example but this is the standard pattern. 13 | 14 | .Notes 15 | [hashtable]::Synchronized() or concurrent dictionary are tempting but not 16 | suitable for increments or counters due to their not atomic nature. 17 | #> 18 | 19 | Import-Module SplitPipeline 20 | 21 | # input items 22 | $items = 1..100 23 | 24 | # shared data 25 | $data = @{ 26 | Count = $items.Count 27 | Done = 0 28 | } 29 | 30 | $items | Split-Pipeline -Count 5 -Variable data {process{ 31 | # simulate some job 32 | Start-Sleep -Milliseconds (Get-Random -Maximum 500) 33 | 34 | # enter the critical section 35 | [System.Threading.Monitor]::Enter($data.SyncRoot) 36 | try { 37 | # update shared data 38 | $done = ++$data.Done 39 | } 40 | finally { 41 | # exit the critical section 42 | [System.Threading.Monitor]::Exit($data) 43 | } 44 | 45 | # show progress 46 | Write-Progress -Activity "Done $done" -Status Processing -PercentComplete (100 * $done / $data.Count) 47 | }} 48 | 49 | # assert 50 | if ($data.Done -ne $items.Count) { throw 'Processed and input item counts must be equal.' } 51 | -------------------------------------------------------------------------------- /Tests/Scripts.test.ps1: -------------------------------------------------------------------------------- 1 | <# 2 | .Synopsis 3 | Tests Split-Pipeline -Begin -Script -End -Finally. 4 | 5 | .Link 6 | Invoked by https://github.com/nightroman/Invoke-Build 7 | #> 8 | 9 | Import-Module SplitPipeline 10 | Set-StrictMode -Version Latest 11 | 12 | $IsCore = $PSVersionTable.PSEdition -eq 'Core' 13 | 14 | task Finally1 { 15 | $1 = '' 16 | try { 17 | 1..10 | Split-Pipeline -Count 2 -Load 1 ` 18 | -Script {throw 'Throw in Script'} ` 19 | -Finally {throw 'Throw in Finally'} 20 | } 21 | catch { $1 = "$_" } 22 | equals $1 'Throw in Script' 23 | } 24 | 25 | task Finally2 { 26 | $result = @( 27 | 1..2 | Split-Pipeline -Count 2 -Load 1 ` 28 | -Script {process{$_}} ` 29 | -Finally {throw 'Throw in Finally'} 30 | ) 31 | 32 | assert ($result.Count -eq 2) $result.Count 33 | } 34 | 35 | task BeginProcessEnd { 36 | $DebugPreference = 'Continue' 37 | 38 | # Use error action Continue or Write-Error will stop 39 | $result = 1..4 | Split-Pipeline -ErrorAction Continue -Count 2 -Load 1 -Verbose ` 40 | -Begin { 41 | $DebugPreference = 'Continue' 42 | $VerbosePreference = 'Continue' 43 | 'begin split' 44 | Write-Debug 'Debug in begin split' 45 | Write-Error 'Error in begin split' 46 | Write-Verbose 'Verbose in begin split' 47 | Write-Warning 'Warning in begin split' 48 | } ` 49 | -End { 50 | 'end split' 51 | Write-Debug 'Debug in end split' 52 | Write-Error 'Error in end split' 53 | Write-Verbose 'Verbose in end split' 54 | Write-Warning 'Warning in end split' 55 | } ` 56 | -Script { 57 | begin { 58 | 'begin part' 59 | Write-Debug 'Debug in script' 60 | Write-Error 'Error in script' 61 | Write-Verbose 'Verbose in script' 62 | Write-Warning 'Warning in script' 63 | } 64 | process { 65 | $_ 66 | } 67 | end { 68 | 'end part' 69 | } 70 | } 71 | $result 72 | 73 | # 1 or 2 'begin/end split' due to -Count 2 74 | $begin_split = ($result -eq 'begin split').Count 75 | $end_split = ($result -eq 'end split').Count 76 | assert ($begin_split -eq 1 -or $begin_split -eq 2) $begin_split 77 | assert ($end_split -eq 1 -or $end_split -eq 2) $end_split 78 | equals $begin_split $end_split 79 | 80 | # 4 'begin/end part' due to 4 items and -Limit 1 81 | equals ($result -eq 'begin part').Count 4 82 | equals ($result -eq 'end part').Count 4 83 | 84 | # all 85 | equals $result.Count (12 + 2 * $end_split) 86 | } 87 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![PSGV](https://img.shields.io/powershellgallery/v/SplitPipeline)![PSGD](https://img.shields.io/powershellgallery/dt/SplitPipeline)](https://www.powershellgallery.com/packages/SplitPipeline) 2 | 3 | # SplitPipeline 4 | 5 | PowerShell module for parallel data processing 6 | 7 | SplitPipeline is designed for Windows PowerShell 5.1 and PowerShell Core. 8 | It provides the only command `Split-Pipeline`. 9 | 10 | `Split-Pipeline` splits the input, processes parts by parallel pipelines, and 11 | outputs results. It may work without collecting the whole input, large or 12 | infinite. 13 | 14 | ## Quick Start 15 | 16 | **Step 1:** Get and install. 17 | 18 | The module is published at the PSGallery: [SplitPipeline](https://www.powershellgallery.com/packages/SplitPipeline). 19 | It may be installed by this command: 20 | 21 | ```powershell 22 | Install-Module SplitPipeline 23 | ``` 24 | 25 | **Step 2:** Import the module: 26 | 27 | ```powershell 28 | Import-Module SplitPipeline 29 | ``` 30 | 31 | **Step 3:** Take a look at help: 32 | 33 | ```powershell 34 | help Split-Pipeline 35 | ``` 36 | 37 | **Step 4:** Try these three commands performing the same job simulating long 38 | but not processor consuming operations on each item: 39 | 40 | ```powershell 41 | 1..10 | . {process{ $_; sleep 1 }} 42 | 1..10 | Split-Pipeline {process{ $_; sleep 1 }} 43 | 1..10 | Split-Pipeline -Count 10 {process{ $_; sleep 1 }} 44 | ``` 45 | 46 | Output of all commands is the same, numbers from 1 to 10 (Split-Pipeline does 47 | not guarantee the same order without the switch `Order`). But consumed times 48 | are different. Let's measure them: 49 | 50 | ```powershell 51 | Measure-Command { 1..10 | . {process{ $_; sleep 1 }} } 52 | Measure-Command { 1..10 | Split-Pipeline {process{ $_; sleep 1 }} } 53 | Measure-Command { 1..10 | Split-Pipeline -Count 10 {process{ $_; sleep 1 }} } 54 | ``` 55 | 56 | The first command takes about 10 seconds. 57 | 58 | Performance of the second command depends on the number of processors which is 59 | used as the default split count. For example, with 2 processors it takes about 60 | 6 seconds. 61 | 62 | The third command takes about 2 seconds. The number of processors is not very 63 | important for such sleeping jobs. The split count is important. Increasing it 64 | to some extent improves overall performance. As for intensive jobs, the split 65 | count normally should not exceed the number of processors. 66 | 67 | ## See also 68 | 69 | - [SplitPipeline Release Notes](https://github.com/nightroman/SplitPipeline/blob/main/Release-Notes.md) 70 | -------------------------------------------------------------------------------- /Tests/About.test.ps1: -------------------------------------------------------------------------------- 1 | <# 2 | .Synopsis 3 | Tests Split-Pipeline. 4 | 5 | .Link 6 | Invoked by https://github.com/nightroman/Invoke-Build 7 | #> 8 | 9 | #requires -Modules SplitPipeline 10 | Set-StrictMode -Version 3 11 | 12 | task help { 13 | . Helps.ps1 14 | Test-Helps ..\Module\en-US\SplitPipeline.dll-Help.ps1 15 | } 16 | 17 | task ApartmentState { 18 | equals MTA (1 | Split-Pipeline { [System.Threading.Thread]::CurrentThread.ApartmentState.ToString() }) 19 | equals MTA (1 | Split-Pipeline -ApartmentState MTA { [System.Threading.Thread]::CurrentThread.ApartmentState.ToString() }) 20 | equals STA (1 | Split-Pipeline -ApartmentState STA { [System.Threading.Thread]::CurrentThread.ApartmentState.ToString() }) 21 | } 22 | 23 | task JobSoftErrorAndCmdletErrorContinueMode { 24 | 42 | Split-Pipeline -ErrorAction Continue -OutVariable OV -ErrorVariable EV {process{ 25 | $_ 26 | Get-Variable MissingSafe 27 | }} 28 | 29 | equals $OV.Count 1 30 | equals $OV[0] 42 31 | equals $EV.Count 1 32 | assert ('ObjectNotFound: (MissingSafe:String) [Split-Pipeline], ItemNotFoundException' -eq $EV[0].CategoryInfo) 33 | } 34 | 35 | task JobSoftErrorThenFailure { 36 | $e = '' 37 | try { 38 | 42 | Split-Pipeline {process{ 39 | Get-Variable MissingSafe 40 | Get-Variable MissingStop -ErrorAction Stop 41 | }} 42 | } 43 | catch {($e = $_)} 44 | assert ('ObjectNotFound: (MissingStop:String) [Get-Variable], ItemNotFoundException' -eq $e.CategoryInfo) 45 | } 46 | 47 | task Refill { 48 | .\Test-Refill.ps1 49 | } 50 | 51 | # Issue #12 52 | task VerbosePreferenceString { 53 | $VerbosePreference = 'Continue' 54 | 1 | Split-Pipeline { 55 | Write-Verbose test-verbose 56 | } 57 | } 58 | 59 | # Issue #12 60 | task VerbosePreferenceNumber { 61 | $VerbosePreference = 2 62 | 1 | Split-Pipeline { 63 | Write-Verbose test-verbose 64 | } 65 | } 66 | 67 | # Issue #12 68 | task VerbosePreferenceInvalid { 69 | $VerbosePreference = 'Invalid' 70 | 1 | Split-Pipeline { 71 | Write-Verbose test-verbose 72 | } 73 | } 74 | 75 | # Issue #29 76 | # 2024-01-11: With v2.0.0 or Windows 11 or new PC, output is less predictble 77 | task WarningVariable { 78 | 1..2 | Split-Pipeline -WarningVariable WV {process{ Write-Warning "test-WarningVariable" }} 79 | assert ($WV.Count -ge 2) 80 | equals $WV[0].Message test-WarningVariable 81 | equals $WV[1].Message test-WarningVariable 82 | } 83 | 84 | # Issue #32 85 | task Test-Start-Job -If ($Host.Name -ne 'FarHost') { 86 | $r = ./Test-Start-Job.ps1 87 | $r | Out-String 88 | 89 | # expected saved time 90 | assert ($r.Time -lt 8) 91 | 92 | # expected 20 items with different PIDs 93 | $data = $r.Data | Sort-Object Item 94 | equals $data.Count 8 95 | equals $data[0].Item 1 96 | equals $data[-1].Item 8 97 | assert ($data[0].PID -ne $data[1].PID) 98 | } 99 | -------------------------------------------------------------------------------- /Tests/Stopping.test.ps1: -------------------------------------------------------------------------------- 1 | <# 2 | .Synopsis 3 | Tests stopping of Split-Pipeline. 4 | #> 5 | 6 | #requires -Modules SplitPipeline 7 | Set-StrictMode -Version 3 8 | 9 | <# 10 | [Ctrl-C] hangs in v1.2.0, works in 1.2.1 https://github.com/nightroman/SplitPipeline/issues/3 11 | 12 | MANUAL TEST SCRIPT 13 | 14 | (!) Ensure notepad is configured to open new windows. 15 | 16 | 1..4 | Split-Pipeline -Verbose -Count 2 {process{ 17 | $p = Start-Process notepad -PassThru 18 | $p.WaitForExit() 19 | }} 20 | 21 | - Invoke the script. Two notepads are opened by two jobs. Split-Pipeline waits for them. 22 | - Press [Ctrl-C] in the console. Split-Pipeline still waits because WaitForExit is not stopped this way. 23 | - Close notepads. Split-Pipeline exits, not hangs. 24 | #> 25 | task Issue3 { 26 | assert (!(Get-Process notepad -ErrorAction Ignore)) 27 | 28 | remove C:\TEMP\SplitPipelineIssue3 29 | $null = mkdir C:\TEMP\SplitPipelineIssue3 30 | 31 | # Split-Pipeline to be stopped 32 | $ps = [PowerShell]::Create() 33 | $null = $ps.AddScript({ 34 | Import-Module SplitPipeline 35 | 1..4 | Split-Pipeline -Verbose -Count 2 -Script {process{ 36 | $p = Start-Process notepad -PassThru 37 | $p.WaitForExit() 38 | }} -Begin { 39 | $id = [runspace]::DefaultRunspace.InstanceId 40 | 1 > "C:\TEMP\SplitPipelineIssue3\Begin-$id" 41 | } -End { 42 | 1 > "C:\TEMP\SplitPipelineIssue3\End-$id" 43 | } -Finally { 44 | 1 > "C:\TEMP\SplitPipelineIssue3\Finally-$id" 45 | } 46 | }) 47 | 48 | # start Split-Pipeline 49 | 'BeginInvoke' 50 | $null = $ps.BeginInvoke() 51 | 52 | # wait for two jobs to start, i.e. two processes 53 | while(@(Get-Process notepad -ErrorAction Ignore).Count -lt 2) { 54 | Start-Sleep -Milliseconds 100 55 | } 56 | 57 | # 2 jobs started 58 | equals @(Get-Process notepad).Count 2 59 | 60 | # start stopping, fake [Ctrl-C] 61 | 'BeginStop' 62 | $a2 = $ps.BeginStop($null, $null) 63 | 64 | #! kill processes, this releases jobs 65 | #! PSv2 Stop-Process is not enough 66 | Start-Sleep 2 67 | while(Get-Process notepad -ErrorAction Ignore) { 68 | Stop-Process -Name notepad 69 | Start-Sleep -Milliseconds 100 70 | } 71 | 72 | # wait, hangs in v1.2.0 73 | 'WaitOne' 74 | $null = $a2.AsyncWaitHandle.WaitOne() 75 | 76 | # no new jobs or processes (3 and 4) 77 | Start-Sleep 2 78 | assert (!(Get-Process notepad -ErrorAction Ignore)) 79 | 80 | # logs 81 | $logs = Get-Item C:\TEMP\SplitPipelineIssue3\* 82 | equals $logs.Count 4 83 | assert ($logs[0].Name -like 'Begin-*-*-*-*-*') 84 | assert ($logs[1].Name -like 'Begin-*-*-*-*-*') 85 | assert ($logs[2].Name -like 'Finally-*-*-*-*-*') 86 | assert ($logs[3].Name -like 'Finally-*-*-*-*-*') 87 | 88 | # end 89 | remove C:\TEMP\SplitPipelineIssue3 90 | } 91 | 92 | task Random { 93 | .\Test-Stopping-Random.ps1 10 94 | } 95 | -------------------------------------------------------------------------------- /Tests/Test-Stopping-Random.ps1: -------------------------------------------------------------------------------- 1 | 2 | <# 3 | .Synopsis 4 | Tests random stopping of Split-Pipeline. 5 | 6 | .Description 7 | Without parameters it repeats random tests infinitely. 8 | 9 | It starts Split-Pipeline with large enough input, slow Script, and Begin 10 | and Finally scripts. Then it waits for a random time and stops (like by 11 | Ctrl-C). Then it checks that Begin and Finally logs match, i.e. for each 12 | started job the Finally script should work even on stopping. 13 | 14 | .Parameter Repeat 15 | Specifies the number of tests. 16 | #> 17 | 18 | param( 19 | $Repeat = [int]::MaxValue 20 | ) 21 | 22 | Set-StrictMode -Version Latest 23 | 24 | # global logs 25 | Add-Type @' 26 | using System; 27 | using System.Collections; 28 | public static class SplitPipelineLog { 29 | public static readonly ArrayList Begin = new ArrayList(); 30 | public static readonly ArrayList Finally = new ArrayList(); 31 | } 32 | '@ 33 | 34 | # test to be invoked async 35 | $test = { 36 | Import-Module SplitPipeline 37 | $VerbosePreference = 2 38 | $lastId = [ref]-1 39 | 40 | $param = @{ 41 | Variable = 'lastId' 42 | Verbose = $true 43 | Count = 10 44 | Load = 3, 1000 45 | Begin = { 46 | $random = New-Object System.Random 47 | $VerbosePreference = 2 48 | $id = ++$lastId.Value 49 | Write-Verbose "[$id] begin" 50 | $null = [SplitPipelineLog]::Begin.Add($id) 51 | } 52 | Finally = { 53 | $null = [SplitPipelineLog]::Finally.Add($id) 54 | } 55 | Script = { 56 | $all = @($input).Count 57 | Write-Verbose "[$id] $all items" 58 | [System.Threading.Thread]::Sleep($random.Next(0, 50)) 59 | } 60 | } 61 | 62 | 1..1mb | Split-Pipeline @param 63 | } 64 | 65 | # repeat random tests 66 | for($n = 1; $n -le $Repeat; ++$n) { 67 | "[$n]" + '-'*70 68 | 69 | # reset logs 70 | [SplitPipelineLog]::Begin.Clear() 71 | [SplitPipelineLog]::Finally.Clear() 72 | 73 | # start Split-Pipeline 74 | $rs = [runspacefactory]::CreateRunspace($Host) 75 | $rs.Open() 76 | $ps = [PowerShell]::Create() 77 | $ps.Runspace = $rs 78 | $null = $ps.AddScript($test) 79 | $null = $ps.BeginInvoke() 80 | 81 | # wait for a random time 82 | $random = New-Object System.Random 83 | $sleep = $random.Next(0, 2000) 84 | "Stop after $sleep ms" 85 | [System.Threading.Thread]::Sleep($sleep) 86 | 87 | # stop 88 | $ps.Stop() 89 | 90 | # show results 91 | $ps.Streams.Error 92 | $ps.Streams.Verbose 93 | 94 | #! weird, else logs may not match 95 | Start-Sleep -Milliseconds 500 96 | 97 | # Begin and Finally should match 98 | $begin = [SplitPipelineLog]::Begin 99 | $finally = [SplitPipelineLog]::Finally 100 | "$begin" 101 | "$finally" 102 | if ($begin.Count -ne $finally.Count) { 103 | Write-Warning "$begin <> $finally" 104 | Read-Host 'Enter' 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /Tests/Test-Refill.ps1: -------------------------------------------------------------------------------- 1 | 2 | <# 3 | .Synopsis 4 | Tests Split-Pipeline -Refill and compares with the alternative method. 5 | 6 | .Description 7 | This is an example of Split-Pipeline with refilled input. The convention is 8 | simple: [ref] objects refill the input, other objects go to output as usual. 9 | 10 | This test processes hierarchical data using two methods: 11 | 1) Split-Pipeline - parallel processing and refilled input; 12 | 2) Step-Node - sequential recursive stepping through nodes. 13 | 14 | Both methods simulate slow data request on a node $_ as: 15 | 16 | Start-Sleep -Milliseconds 500; $_.GetEnumerator() 17 | 18 | Both methods process/output leaf nodes in the same way: 19 | 20 | '{0}={1}' -f $node.Key, $node.Value 21 | 22 | Split-Pipeline refills the input with container nodes: 23 | 24 | [ref]$node.Value 25 | 26 | Step-Node calls itself recursively with container nodes: 27 | 28 | Step-Node $node.Value 29 | 30 | The test shows that sorted results of two methods are the same and 31 | Split-Pipeline normally works faster than Step-Node. 32 | 33 | Result order is different due to different order of node processing. 34 | Besides, order of Split-Pipeline results is not necessarily constant. 35 | 36 | .Link 37 | https://github.com/nightroman/SplitPipeline/blob/main/Tests/Test-Refill.ps1 38 | #> 39 | 40 | ### Hierarchical data: container nodes are represented by hashtables 41 | $node1 = @{data1=1; data2=2; data3=3} 42 | $node2 = @{node1=$node1; node2=$node1; data4=4; data5=5} 43 | $root = @{node1=$node2; node2=$node2; data6=6; data7=7} 44 | $root | Format-Custom | Out-String 45 | 46 | ### Test 1: Refill Split-Pipeline with nodes 47 | $time1 = [Diagnostics.Stopwatch]::StartNew() 48 | $data1 = $root | Split-Pipeline -Refill {process{ 49 | foreach($node in $(Start-Sleep -Milliseconds 500; $_.GetEnumerator())) { 50 | if ($node.Value -is [hashtable]) { 51 | [ref]$node.Value 52 | } 53 | else { 54 | '{0}={1}' -f $node.Key, $node.Value 55 | } 56 | } 57 | }} 58 | $time1.Stop() 59 | 60 | ### Test 2: Step through nodes recursively 61 | $time2 = [Diagnostics.Stopwatch]::StartNew() 62 | function Step-Node($_) { 63 | foreach($node in $(Start-Sleep -Milliseconds 500; $_.GetEnumerator())) { 64 | if ($node.Value -is [hashtable]) { 65 | Step-Node $node.Value 66 | } 67 | else { 68 | '{0}={1}' -f $node.Key, $node.Value 69 | } 70 | } 71 | } 72 | $data2 = Step-Node $root 73 | $time2.Stop() 74 | 75 | ### Test: Sorted results should be the same 76 | $data1 = ($data1 | Sort-Object) -join ',' 77 | $data2 = ($data2 | Sort-Object) -join ',' 78 | $data1 79 | $data2 80 | if ($data1 -ne $data2) { throw 'Different results' } 81 | 82 | ### Test: Split-Pipeline should work faster than recursive processing 83 | $time1.Elapsed.ToString() 84 | $time2.Elapsed.ToString() 85 | if ($time1.Elapsed -ge $time2.Elapsed) { Write-Warning 'Unexpected times.' } 86 | -------------------------------------------------------------------------------- /Tests/Load.test.ps1: -------------------------------------------------------------------------------- 1 | 2 | <# 3 | .Synopsis 4 | Tests Split-Pipeline -Load. 5 | 6 | .Link 7 | Invoked by https://github.com/nightroman/Invoke-Build 8 | #> 9 | 10 | Import-Module SplitPipeline 11 | Set-StrictMode -Version Latest 12 | 13 | # Count words in input data. We used to output just `@($input).Count` and check 14 | # output counts, i.e. $r[0] 1, $r[1] 1. The problem: 3rd load may output before 15 | # 2nd pipe is done. Thus we either should use -Order or output/check differently. 16 | # So we output joined items and check them anywhere, not just at [0] or [1]. 17 | function Get-WordCount($Data) { 18 | $count = 0 19 | foreach($_ in $Data) { 20 | $count += $_.Split(' ').Length 21 | } 22 | $count 23 | } 24 | 25 | task Error { 26 | # 0 args 27 | ($r = try {1..9 | Split-Pipeline {} -Load @()} catch {$_}) 28 | equals $r.FullyQualifiedErrorId 'ParameterArgumentValidationError,SplitPipeline.SplitPipelineCommand' 29 | 30 | # null 31 | ($r = try {1..9 | Split-Pipeline {} -Load $null} catch {$_}) 32 | equals $r.FullyQualifiedErrorId 'ParameterArgumentValidationError,SplitPipeline.SplitPipelineCommand' 33 | 34 | # 3+ args 35 | ($r = try {1..9 | Split-Pipeline {} -Load 1,2,3} catch {$_}) 36 | equals $r.FullyQualifiedErrorId 'ParameterArgumentValidationError,SplitPipeline.SplitPipelineCommand' 37 | 38 | # [0] > [1] 39 | ($r = try {1..9 | Split-Pipeline {} -Load 1,0} catch {$_}) 40 | equals $r.FullyQualifiedErrorId 'ParameterBindingFailed,SplitPipeline.SplitPipelineCommand' 41 | 42 | # [0]<1 is fine and treated as omitted, [1] is ignored 43 | $r = 1..9 | Split-Pipeline {@($input).Count} -Load 0,-1 -Count 2 44 | equals $r.Count 2 45 | equals $r[0] 5 46 | equals $r[1] 4 47 | } 48 | 49 | # v1.4.0 By default the whole input is collected and split evenly 50 | #! The order is not guaranteed but so far this test works as is. 51 | task TheWholeInput { 52 | ($r = 1..11 | Split-Pipeline -Count 2 {@($input).Count}) 53 | equals $r.Count 2 54 | equals $r[0] 6 55 | equals $r[1] 5 56 | 57 | # same using the parameter, 1.6.0 58 | ($r = Split-Pipeline -Count 2 {@($input).Count} (1..11)) 59 | equals $r.Count 2 60 | equals $r[0] 6 61 | equals $r[1] 5 62 | } 63 | 64 | # `-Load 1` lets the algorithm to work as soon as any input available 65 | #! This test was the first to show not predicted order problems and was redesigned. 66 | task LetItChoose { 67 | ($r = 1..11 | Split-Pipeline -Count 2 {@($input) -join ' '} -Load 1) 68 | assert ($r.Count -ge 4) 69 | assert ($r -contains '1') 70 | assert ($r -contains '2') 71 | equals (Get-WordCount $r) 11 72 | } 73 | 74 | # `-Load 2` sets the minimum 75 | task Min2MaxX { 76 | ($r = 1..11 | Split-Pipeline -Count 2 {@($input) -join ' '} -Load 2) 77 | assert ($r.Count -ge 4) 78 | assert ($r -contains '1 2') 79 | assert ($r -contains '3 4') 80 | equals (Get-WordCount $r) 11 81 | } 82 | 83 | # `-Load 4,4` sets the part size to 4 84 | task Min4Max4 { 85 | ($r = 1..11 | Split-Pipeline -Count 2 {@($input) -join ' '} -Load 4,4 -Order) 86 | equals $r.Count 3 87 | assert ($r -contains '1 2 3 4') 88 | assert ($r -contains '5 6 7 8') 89 | assert ($r -contains '9 10 11') 90 | } 91 | -------------------------------------------------------------------------------- /Src/Job.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.ObjectModel; 2 | using System.Management.Automation; 3 | using System.Management.Automation.Runspaces; 4 | 5 | namespace SplitPipeline; 6 | 7 | class Job 8 | { 9 | readonly PowerShell _posh = PowerShell.Create(); 10 | IAsyncResult _async; 11 | 12 | /// 13 | /// Gets the pipeline streams. 14 | /// 15 | public PSDataStreams Streams { get { return _posh.Streams; } } 16 | /// 17 | /// Gets the wait handle of the async pipeline. 18 | /// 19 | public WaitHandle WaitHandle { get { return _async.AsyncWaitHandle; } } 20 | /// 21 | /// Gets true if it is not completed or failed. 22 | /// 23 | public bool IsWorking 24 | { 25 | get 26 | { 27 | switch (_posh.InvocationStateInfo.State) 28 | { 29 | case PSInvocationState.Completed: return false; 30 | case PSInvocationState.Failed: return false; 31 | } 32 | return true; 33 | } 34 | } 35 | /// 36 | /// New job with its runspace. The runspace gets opened. 37 | /// 38 | public Job(Runspace runspace) 39 | { 40 | _posh.Runspace = runspace; 41 | runspace.Open(); 42 | } 43 | /// 44 | /// Invokes the begin script, if any, sets the pipeline script once, returns the begin output. 45 | /// 46 | public Collection InvokeBegin(string begin, string script) 47 | { 48 | Collection result = null; 49 | if (begin != null) 50 | { 51 | _posh.AddScript(begin, false); 52 | result = _posh.Invoke(); 53 | _posh.Commands.Clear(); 54 | } 55 | 56 | _posh.AddScript(script); 57 | return result; 58 | } 59 | /// 60 | /// Starts the pipeline script async. 61 | /// 62 | public void BeginInvoke(Queue queue, int count) 63 | { 64 | var input = new PSDataCollection(count); 65 | while (--count >= 0) 66 | input.Add(queue.Dequeue()); 67 | input.Complete(); 68 | 69 | _async = _posh.BeginInvoke(input); 70 | } 71 | /// 72 | /// Waits for the pipeline to finish and returns its output. 73 | /// 74 | /// 75 | public PSDataCollection EndInvoke() 76 | { 77 | if (_async == null) 78 | return null; 79 | 80 | return _posh.EndInvoke(_async); 81 | } 82 | /// 83 | /// Invokes the end script and returns its output. 84 | /// 85 | public Collection InvokeEnd(string script) 86 | { 87 | _posh.Commands.Clear(); 88 | _posh.AddScript(script, false); 89 | return _posh.Invoke(); 90 | } 91 | /// 92 | /// Invokes the final script, its output is ignored. 93 | /// 94 | public void InvokeFinally(string script) 95 | { 96 | // it may be still running, e.g. on stopping 97 | if (_posh.InvocationStateInfo.State == PSInvocationState.Running) 98 | _posh.Stop(); 99 | 100 | // invoke 101 | _posh.Commands.Clear(); 102 | _posh.AddScript(script, false); 103 | _posh.Invoke(); 104 | } 105 | /// 106 | /// Closes the pipeline and the runspace. 107 | /// 108 | public void Close() 109 | { 110 | _posh.Dispose(); 111 | _posh.Runspace.Dispose(); 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /1.build.ps1: -------------------------------------------------------------------------------- 1 | <# 2 | .Synopsis 3 | Build script, https://github.com/nightroman/Invoke-Build 4 | #> 5 | 6 | param( 7 | $Configuration = 'Release' 8 | ) 9 | 10 | Set-StrictMode -Version 3 11 | $_name = 'SplitPipeline' 12 | $_root = "$env:ProgramFiles\WindowsPowerShell\Modules\$_name" 13 | 14 | # Synopsis: Remove temp files. 15 | task clean { 16 | remove z, Src\bin, Src\obj, README.html 17 | } 18 | 19 | # Synopsis: Generate meta files. 20 | task meta -Inputs $BuildFile, Release-Notes.md -Outputs "Module\$_name.psd1", Src\Directory.Build.props -Jobs version, { 21 | $Project = 'https://github.com/nightroman/SplitPipeline' 22 | $Summary = 'SplitPipeline - Parallel Data Processing in PowerShell' 23 | $Copyright = 'Copyright (c) Roman Kuzmin' 24 | 25 | Set-Content "Module\$_name.psd1" @" 26 | @{ 27 | Author = 'Roman Kuzmin' 28 | ModuleVersion = '$_version' 29 | Description = '$Summary' 30 | CompanyName = '$Project' 31 | Copyright = '$Copyright' 32 | 33 | RootModule = '$_name.dll' 34 | 35 | PowerShellVersion = '5.1' 36 | GUID = '7806b9d6-cb68-4e21-872a-aeec7174a087' 37 | 38 | CmdletsToExport = 'Split-Pipeline' 39 | FunctionsToExport = @() 40 | VariablesToExport = @() 41 | AliasesToExport = @() 42 | 43 | PrivateData = @{ 44 | PSData = @{ 45 | Tags = 'Parallel', 'Pipeline', 'Runspace', 'Invoke', 'Foreach' 46 | LicenseUri = 'http://www.apache.org/licenses/LICENSE-2.0' 47 | ProjectUri = 'https://github.com/nightroman/SplitPipeline' 48 | ReleaseNotes = 'https://github.com/nightroman/SplitPipeline/blob/main/Release-Notes.md' 49 | } 50 | } 51 | } 52 | "@ 53 | 54 | Set-Content Src\Directory.Build.props @" 55 | 56 | 57 | $Project 58 | $Copyright 59 | $Summary 60 | $_name 61 | $_version 62 | False 63 | 64 | 65 | "@ 66 | } 67 | 68 | # Synopsis: Build, publish in post-build, make help. 69 | task build meta, { 70 | exec { dotnet build "Src\$_name.csproj" -c $Configuration --tl:off } 71 | } 72 | 73 | # Synopsis: Publish the module (post-build). 74 | task publish { 75 | exec { robocopy Module $_root /s /xf *-Help.ps1 } (0..3) 76 | exec { dotnet publish Src\$_name.csproj --no-build -c $Configuration -o $_root } 77 | remove $_root\System.Management.Automation.dll, $_root\*.deps.json 78 | } 79 | 80 | # Synopsis: Build help by https://github.com/nightroman/Helps 81 | task help -After ?build -Inputs @(Get-Item Src\*.cs, "Module\en-US\$_name.dll-Help.ps1") -Outputs "$_root\en-US\$_name.dll-Help.xml" { 82 | . Helps.ps1 83 | Convert-Helps "Module\en-US\$_name.dll-Help.ps1" $Outputs 84 | } 85 | 86 | # Synopsis: Set $Script:_version. 87 | task version { 88 | ($Script:_version = Get-BuildVersion Release-Notes.md '##\s+v(\d+\.\d+\.\d+)') 89 | } 90 | 91 | # Synopsis: Convert markdown files to HTML. 92 | task markdown { 93 | exec { pandoc.exe --standalone --from=gfm --output=README.html --metadata=pagetitle=$_name README.md } 94 | } 95 | 96 | # Synopsis: Make the package. 97 | task package markdown, version, { 98 | equals $_version (Get-Item $_root\$_name.dll).VersionInfo.ProductVersion 99 | equals ([Version]$_version) (Get-Module $_name -ListAvailable).Version 100 | 101 | remove z 102 | exec { robocopy $_root z\$_name /s /xf *.pdb } (0..3) 103 | 104 | Copy-Item LICENSE, README.html -Destination z\$_name 105 | 106 | Assert-SameFile.ps1 -Result (Get-ChildItem z\$_name -Recurse -File -Name) -Text -View $env:MERGE @' 107 | LICENSE 108 | README.html 109 | SplitPipeline.dll 110 | SplitPipeline.psd1 111 | en-US\about_SplitPipeline.help.txt 112 | en-US\SplitPipeline.dll-Help.xml 113 | '@ 114 | } 115 | 116 | # Synopsis: Make and push the PSGallery package. 117 | task pushPSGallery package, { 118 | $NuGetApiKey = Read-Host NuGetApiKey 119 | Publish-Module -Path z\$_name -NuGetApiKey $NuGetApiKey 120 | }, 121 | clean 122 | 123 | # Synopsis: Push to the repository with a version tag. 124 | task pushRelease version, { 125 | $changes = exec { git status --short } 126 | assert (!$changes) "Please, commit changes." 127 | 128 | exec { git push } 129 | exec { git tag -a "v$_version" -m "v$_version" } 130 | exec { git push origin "v$_version" } 131 | } 132 | 133 | # Synopsis: Run tests. 134 | task test { 135 | Invoke-Build ** Tests 136 | } 137 | 138 | # Synopsis: Test Core. 139 | task core { 140 | exec { pwsh -NoProfile -Command Invoke-Build test } 141 | } 142 | 143 | # Synopsis: Test Desktop. 144 | task desktop { 145 | exec { powershell -NoProfile -Command Invoke-Build test } 146 | } 147 | 148 | # Synopsis: Test editions. 149 | task tests desktop, core 150 | 151 | # Synopsis: Build and clean. 152 | task . build, clean 153 | -------------------------------------------------------------------------------- /Release-Notes.md: -------------------------------------------------------------------------------- 1 | # SplitPipeline Release Notes 2 | 3 | ## v2.0.1 4 | 5 | Avoid double warnings, #29. 6 | 7 | ## v2.0.0 8 | 9 | - Designed for Windows PowerShell 5.1 and PowerShell Core 10 | - Built with PowerShellStandard.Library 11 | - Published at PSGallery only 12 | 13 | ## v1.6.3 14 | 15 | Add help about error preference, #30. 16 | 17 | ## v1.6.2 18 | 19 | Fixed #29, `WarningVariable` should be populated. 20 | 21 | ## v1.6.1 22 | 23 | New helper `$Pipeline.Lock(script)` for mutually exclusive operations, #25. 24 | It is not designed for usual scenarios because it "breaks" parallel flows. 25 | 26 | ## v1.6.0 27 | 28 | Input objects may be provided using the parameter `InputObject`, #19. 29 | 30 | ## v1.5.3 31 | 32 | Packaged and published as PSGallery module. 33 | 34 | ## v1.5.2 35 | 36 | Fixed #12 `VerbosePreference` can be any value. 37 | 38 | ## v1.5.1 39 | 40 | Fixed #10 Tight loop in `EndProcessing()` 41 | 42 | ## v1.5.0 43 | 44 | `Count` accepts one or two values. One is as usual. Two values limit the number 45 | of required pipelines also taking into account the number of processors. (Too 46 | many pipelines on machines with many cores is not always optimal.) 47 | 48 | Corrected the test/demo script *Test-ProgressTotal.ps1*. 49 | 50 | Minor performance tweaks on creation of runspaces. 51 | 52 | ## v1.4.3 53 | 54 | Fixed duplicated debug, warning, and verbose messages (v1.4.2). 55 | 56 | ## v1.4.2 57 | 58 | Pipeline runspaces are created with the host used by `Split-Pipeline`. As a 59 | result, some host features can be used by pipeline scripts, e.g. `Write-Host` 60 | and even `Write-Progress`, see `Test-Progress*.ps1` in the project repository. 61 | 62 | ## v1.4.1 63 | 64 | If the minimum `Load` is less than 1 then the parameter is treated as omitted. 65 | 66 | ## v1.4.0 67 | 68 | *Potentially incompatible change*. By default, i.e. when `Load` is omitted, the 69 | whole input is collected and split evenly between parallel pipelines. This way 70 | seems to be the most effective in simple cases. In other cases, e.g. on large 71 | or slow input, `Load` should be used in order to enable processing of input 72 | parts and specify their limits. 73 | 74 | Corrected input item count in `Refill` mode in verbose statistics. 75 | 76 | Refactoring of ending, closing, and stopping. 77 | 78 | ## v1.3.1 79 | 80 | Removed the obsolete switch `Auto` and pieces of old code. 81 | 82 | ## v1.3.0 83 | 84 | Reviewed automatic load balancing, made it the default and less aggressive 85 | (*potentially incompatible change*). The obsolete switch `Auto` still exists 86 | but it is ignored. Use the parameter `Load` in order to specify part limits. 87 | E.g. `-Load N,N` tells to use N items per pipeline, i.e. no load balancing. 88 | 89 | In order words: a) `Auto` is slightly redundant with `Load`; b) not using 90 | `Auto`, e.g. forgetting, often causes less effective work. `Auto` will be 91 | removed in the next version. 92 | 93 | Improved stopping (e.g. by `[Ctrl-C]`): 94 | 95 | - Fixed some known and some potential issues. 96 | - The `Finally` script should work on stopping. 97 | 98 | Amended verbose messages. They are for: 99 | 100 | - Each job feed with current data. 101 | - End of processing with end data. 102 | - Summary with totals. 103 | 104 | ## v1.2.1 105 | 106 | Added processing of `StopProcessing()` which is called on `[Ctrl-C]`. Note that 107 | stopping is normally not recommended. But in some cases "under construction" it 108 | may help, e.g. [#3](https://github.com/nightroman/SplitPipeline/issues/3). 109 | 110 | ## v1.2.0 111 | 112 | Debug streams of parallel pipelines are processed as well and debug messages 113 | are propagated to the main pipeline, just like errors, warnings, and verbose 114 | messages. 115 | 116 | ## v1.1.0 117 | 118 | New parameter `ApartmentState`. 119 | 120 | ## v1.0.1 121 | 122 | Help. Mentioned why and when to use `Variable`, `Function`, and `Module`. Added 123 | the related example. 124 | 125 | ## v1.0.0 126 | 127 | Minor cosmetic changes in help and code. The API seems to be stabilized and no 128 | issues were found for a while. Changed the status from "beta" to "release". 129 | 130 | ## v0.4.1 131 | 132 | Refactoring and minor improvements. 133 | 134 | ## v0.4.0 135 | 136 | Revision of parameters and automatic load balancing (mostly simplification). 137 | Joined parameters Load and Limit into the single parameter Load (one or two 138 | values). Removed parameters Cost (not needed now) and Queue (Load is used in 139 | order to limit the queue). 140 | 141 | ## v0.3.2 142 | 143 | Minor tweaks. 144 | 145 | ## v0.3.1 146 | 147 | Refilled input makes infinite loops possible in some scenarios. Use the new 148 | parameter `Filter` in order to exclude already processed objects and avoid 149 | loops. 150 | 151 | ## v0.3.0 152 | 153 | New switch `Refill` tells to refill the input queue from output. `[ref]` 154 | objects are intercepted and added to the input queue. Other objects go to 155 | output as usual. See an example in help and `Test-Refill.ps1`. 156 | 157 | Tweaks in feeding parallel pipelines and automatic tuning of load. 158 | 159 | ## v0.2.0 160 | 161 | New switch `Order` tells to output part results in the same order as input 162 | parts arrive. Thus, although order of processing is not predictable, output 163 | order can be made predictable. This feature open doors for more scenarios. 164 | 165 | Added checks for `Stopping` in `EndProcessing` (faster stop on `Ctrl+C`). 166 | 167 | ## v0.1.1 168 | 169 | Tweaks, including related to PowerShell V3 CTP2. 170 | 171 | ## v0.1.0 172 | 173 | New switch `Auto` is used in order to determine Load values automatically during 174 | processing. Use `Verbose` in order to view some related information. Yet another 175 | new parameter `Cost` is used together with `Auto`; it is introduced rather for 176 | experiments. 177 | 178 | ## v0.0.1 179 | 180 | This is the first of v0 series (pre-release versions). Cmdlet parameters and 181 | behaviour may change. 182 | 183 | The cmdlet Split-Pipeline passes simple tests and shows good performance gain 184 | in a few practical scenarios. 185 | 186 | Failures, errors, warnings, and verbose messages from parallel pipelines are 187 | trivial, straightforward, and perhaps not useful enough for troubleshooting. 188 | -------------------------------------------------------------------------------- /Module/en-US/SplitPipeline.dll-Help.ps1: -------------------------------------------------------------------------------- 1 | 2 | <# 3 | .Synopsis 4 | Help script (https://github.com/nightroman/Helps) 5 | #> 6 | 7 | # Import the module to make commands available for the builder. 8 | Import-Module SplitPipeline 9 | 10 | ### Split-Pipeline command help 11 | @{ 12 | command = 'Split-Pipeline' 13 | synopsis = @' 14 | Splits pipeline input and processes its parts by parallel pipelines. 15 | '@ 16 | description = @' 17 | The cmdlet splits the input, processes its parts by parallel pipelines, and 18 | outputs the results for further processing. It may work without collecting 19 | the whole input, large or infinite. 20 | 21 | When Load is omitted the whole input is collected and split evenly between 22 | Count parallel pipelines. This method shows the best performance in simple 23 | cases. In other cases, e.g. on large or slow input, Load should be used in 24 | order to enable processing of partially collected input. 25 | 26 | The cmdlet creates several pipelines. Each pipeline is created when input 27 | parts are available, created pipelines are busy, and their number is less 28 | than Count. Each pipeline is used for processing one or more input parts. 29 | 30 | Because each pipeline works in its own runspace variables, functions, and 31 | modules from the main script are not automatically available for pipeline 32 | scripts. Such items should be specified by Variable, Function, and Module 33 | parameters in order to be available. 34 | 35 | The Begin and End scripts are invoked for each created pipeline once before 36 | and after processing. Each input part is piped to the script block Script. 37 | The Finally script is invoked after all, even on failures or stopping. 38 | 39 | If number of created pipelines is equal to Count and all pipelines are busy 40 | then incoming input items are enqueued for later processing. If the queue 41 | size hits the limit then the algorithm waits for any pipeline to complete. 42 | 43 | Input parts are not necessarily processed in the same order as they come. 44 | But output parts can be ordered according to input, use the switch Order. 45 | 46 | In rare scenarios when synchronous code must be invoked in pipelines, 47 | use the helper $Pipeline.Lock, see the repository tests for examples. 48 | 49 | ERROR PREFERENCE 50 | 51 | If the current error preference is Stop and the internal pipelines emit 52 | errors (even non-terminating) then Split-Pipeline treats these errors as 53 | terminating per its current environment. To avoid this consider using 54 | -ErrorAction Continue. 55 | '@ 56 | parameters = @{ 57 | Script = @' 58 | The script invoked for each input part of each pipeline with an input 59 | part piped to it. The script either processes the whole part ($input) 60 | or each item ($_) separately in the "process" block. Examples: 61 | 62 | # Process the whole $input part: 63 | ... | Split-Pipeline { $input | %{ $_ } } 64 | 65 | # Process input items $_ separately: 66 | ... | Split-Pipeline { process { $_ } } 67 | 68 | The script may have any of "begin", "process", and "end" blocks: 69 | 70 | ... | Split-Pipeline { begin {...} process { $_ } end {...} } 71 | 72 | Note that "begin" and "end" blocks are called for each input part but 73 | scripts defined by parameters Begin and End are called for pipelines. 74 | '@ 75 | InputObject = @' 76 | Input objects processed by parallel pipelines. Normally this parameter 77 | is not used directly, objects are sent using the pipeline. But it is 78 | fine to specify the input using this parameter. 79 | '@ 80 | Begin = @' 81 | The script invoked for each pipeline on creation before processing. The 82 | goal is to initialize the runspace to be used by the pipeline, normally 83 | to set some variables, dot-source scripts, import modules, and etc. 84 | '@ 85 | End = @' 86 | The script invoked for each pipeline once after processing. The goal 87 | is, for example, to output some results accumulated during processing 88 | of input parts by the pipeline. Consider to use Finally for releasing 89 | resources instead of End or in addition to it. 90 | '@ 91 | Finally = @' 92 | The script invoked for each opened pipeline before its closing, even on 93 | terminating errors or stopping (Ctrl-C). It is normally needed in order 94 | to release resources created by Begin. Output is ignored. If Finally 95 | fails then its errors are written as warnings because it has to be 96 | called for remaining pipelines. 97 | '@ 98 | Filter = @' 99 | Either a hashtable for collecting unique input objects or a script used 100 | in order to test an input object. Input includes extra objects added in 101 | Refill mode. In fact, this filter is mostly needed for Refill. 102 | 103 | A hashtable is used in order to collect and enqueue unique objects. In 104 | Refill mode it may be useful for avoiding infinite loops. 105 | 106 | A script is invoked in a child scope of the scope where the cmdlet is 107 | invoked. The first argument is an object being tested. Returned $true 108 | tells to add an object to the input queue. 109 | '@ 110 | Count = @' 111 | Specifies the parallel pipeline count. The default value is the number 112 | or processors. For intensive jobs use the default or decreased value, 113 | especially if there are other tasks working at the same time. But for 114 | jobs not consuming much processor resources increasing the number may 115 | improve performance. 116 | 117 | The parameter accepts an array of one or two integers. A single value 118 | specifies the recommended number of pipelines. Two arguments specify 119 | the minimum and maximum numbers and the recommended value is set to 120 | Max(Count[0], Min(Count[1], ProcessorCount)). 121 | '@ 122 | Load = @' 123 | Enables processing of partially collected input and specifies input 124 | part limits. If it is omitted then the whole input is collected and 125 | split evenly between pipelines. 126 | 127 | The parameter accepts an array of one or two integers. The first is the 128 | minimum number of objects per pipeline. If it is less than 1 then Load 129 | is treated as omitted. The second number is the optional maximum. 130 | 131 | If processing is fast then it is important to specify a proper minimum. 132 | Otherwise Split-Pipeline may work even slower than a standard pipeline. 133 | 134 | Setting the maximum causes more frequent output. For example, this may 135 | be important for feeding simultaneously working downstream pipelines. 136 | 137 | Setting the maximum number is also needed for potentially large input 138 | in order to limit the input queue size and avoid out of memory issues. 139 | The maximum queue size is set internally to Load[1] * Count. 140 | 141 | Use the switch Verbose in order to get some statistics which may help 142 | to choose suitable load limits. 143 | 144 | CAUTION: The queue limit may be ignored and exceeded if Refill is used. 145 | Any number of objects written via [ref] go straight to the input queue. 146 | Thus, depending on data Refill scenarios may fail due to out of memory. 147 | '@ 148 | Variable = @' 149 | Variables imported from the current runspace to parallel. 150 | '@ 151 | Function = @' 152 | Functions imported from the current runspace to parallel. 153 | '@ 154 | Module = @' 155 | Modules imported to parallel runspaces. 156 | '@ 157 | Order = @' 158 | Tells to output part results in the same order as input parts arrive. 159 | The algorithm may work slower. 160 | '@ 161 | Refill = @' 162 | Tells to refill the input by [ref] objects from output. Other objects 163 | go to output as usual. This convention is used for processing items of 164 | hierarchical data structures: child container items come back to input, 165 | leaf items or other data produced by processing go to output. 166 | 167 | NOTE: Refilled input makes infinite loops possible for some data. Use 168 | Filter in order to exclude already processed objects and avoid loops. 169 | '@ 170 | ApartmentState = @' 171 | Specify either "MTA" (multi-threaded ) or "STA" (single-threaded) for 172 | the apartment states of the threads used to run commands in pipelines. 173 | '@ 174 | } 175 | inputs = @( 176 | @{ 177 | type = 'Object' 178 | description = @' 179 | Input objects processed by parallel pipelines. 180 | '@ 181 | } 182 | ) 183 | outputs = @( 184 | @{ 185 | type = 'Object' 186 | description = @' 187 | Output of the Begin, Script, and End script blocks. The scripts Begin 188 | and End are invoked once for each pipeline before and after processing. 189 | The script Script is invoked repeatedly with input parts piped to it. 190 | '@ 191 | } 192 | ) 193 | examples = @( 194 | @{ 195 | code = { 196 | 1..10 | . {process{ $_; sleep 1 }} 197 | 1..10 | Split-Pipeline -Count 10 {process{ $_; sleep 1 }} 198 | } 199 | remarks = @' 200 | Two commands perform the same job simulating long but not processor 201 | consuming operations on each item. The first command takes about 10 202 | seconds. The second takes about 2 seconds due to Split-Pipeline. 203 | '@ 204 | test = { . $args[0] } 205 | } 206 | @{ 207 | code = { 208 | $PSHOME | Split-Pipeline -Refill {process{ 209 | foreach($item in Get-ChildItem -LiteralPath $_ -Force) { 210 | if ($item.PSIsContainer) { 211 | [ref]$item.FullName 212 | } 213 | else { 214 | $item.Length 215 | } 216 | } 217 | }} | Measure-Object -Sum 218 | } 219 | remarks = @' 220 | This is an example of Split-Pipeline with refilled input. By the convention 221 | output [ref] objects refill the input, other objects go to output as usual. 222 | 223 | The code calculates the number and size of files in $PSHOME. It is a "how 224 | to" sample, performance gain is not expected because the code is trivial 225 | and works relatively fast. 226 | 227 | See also another example with simulated slow data requests: 228 | https://github.com/nightroman/SplitPipeline/blob/main/Tests/Test-Refill.ps1 229 | '@ 230 | test = { . $args[0] } 231 | } 232 | @{ 233 | remarks = @' 234 | Because each pipeline works in its own runspace variables, functions, and 235 | modules from the main script are not automatically available for pipeline 236 | scripts. Such items should be specified by Variable, Function, and Module 237 | parameters in order to be available. 238 | 239 | > $arr = @('one', 'two', 'three'); 0..2 | . {process{ $arr[$_] }} 240 | one 241 | two 242 | three 243 | 244 | > $arr = @('one', 'two', 'three'); 0..2 | Split-Pipeline {process{ $arr[$_] }} 245 | Split-Pipeline : Cannot index into a null array. 246 | ... 247 | 248 | > $arr = @('one', 'two', 'three'); 0..2 | Split-Pipeline -Variable arr {process{ $arr[$_] }} 249 | one 250 | two 251 | three 252 | '@ 253 | } 254 | ) 255 | links = @( 256 | @{ text = 'Project site:'; URI = 'https://github.com/nightroman/SplitPipeline' } 257 | ) 258 | } 259 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 Roman Kuzmin 2 | 3 | Apache License 4 | Version 2.0, January 2004 5 | http://www.apache.org/licenses/ 6 | 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 8 | 9 | 1. Definitions. 10 | 11 | "License" shall mean the terms and conditions for use, reproduction, 12 | and distribution as defined by Sections 1 through 9 of this document. 13 | 14 | "Licensor" shall mean the copyright owner or entity authorized by 15 | the copyright owner that is granting the License. 16 | 17 | "Legal Entity" shall mean the union of the acting entity and all 18 | other entities that control, are controlled by, or are under common 19 | control with that entity. For the purposes of this definition, 20 | "control" means (i) the power, direct or indirect, to cause the 21 | direction or management of such entity, whether by contract or 22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 23 | outstanding shares, or (iii) beneficial ownership of such entity. 24 | 25 | "You" (or "Your") shall mean an individual or Legal Entity 26 | exercising permissions granted by this License. 27 | 28 | "Source" form shall mean the preferred form for making modifications, 29 | including but not limited to software source code, documentation 30 | source, and configuration files. 31 | 32 | "Object" form shall mean any form resulting from mechanical 33 | transformation or translation of a Source form, including but 34 | not limited to compiled object code, generated documentation, 35 | and conversions to other media types. 36 | 37 | "Work" shall mean the work of authorship, whether in Source or 38 | Object form, made available under the License, as indicated by a 39 | copyright notice that is included in or attached to the work 40 | (an example is provided in the Appendix below). 41 | 42 | "Derivative Works" shall mean any work, whether in Source or Object 43 | form, that is based on (or derived from) the Work and for which the 44 | editorial revisions, annotations, elaborations, or other modifications 45 | represent, as a whole, an original work of authorship. For the purposes 46 | of this License, Derivative Works shall not include works that remain 47 | separable from, or merely link (or bind by name) to the interfaces of, 48 | the Work and Derivative Works thereof. 49 | 50 | "Contribution" shall mean any work of authorship, including 51 | the original version of the Work and any modifications or additions 52 | to that Work or Derivative Works thereof, that is intentionally 53 | submitted to Licensor for inclusion in the Work by the copyright owner 54 | or by an individual or Legal Entity authorized to submit on behalf of 55 | the copyright owner. For the purposes of this definition, "submitted" 56 | means any form of electronic, verbal, or written communication sent 57 | to the Licensor or its representatives, including but not limited to 58 | communication on electronic mailing lists, source code control systems, 59 | and issue tracking systems that are managed by, or on behalf of, the 60 | Licensor for the purpose of discussing and improving the Work, but 61 | excluding communication that is conspicuously marked or otherwise 62 | designated in writing by the copyright owner as "Not a Contribution." 63 | 64 | "Contributor" shall mean Licensor and any individual or Legal Entity 65 | on behalf of whom a Contribution has been received by Licensor and 66 | subsequently incorporated within the Work. 67 | 68 | 2. Grant of Copyright License. Subject to the terms and conditions of 69 | this License, each Contributor hereby grants to You a perpetual, 70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 71 | copyright license to reproduce, prepare Derivative Works of, 72 | publicly display, publicly perform, sublicense, and distribute the 73 | Work and such Derivative Works in Source or Object form. 74 | 75 | 3. Grant of Patent License. Subject to the terms and conditions of 76 | this License, each Contributor hereby grants to You a perpetual, 77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 78 | (except as stated in this section) patent license to make, have made, 79 | use, offer to sell, sell, import, and otherwise transfer the Work, 80 | where such license applies only to those patent claims licensable 81 | by such Contributor that are necessarily infringed by their 82 | Contribution(s) alone or by combination of their Contribution(s) 83 | with the Work to which such Contribution(s) was submitted. If You 84 | institute patent litigation against any entity (including a 85 | cross-claim or counterclaim in a lawsuit) alleging that the Work 86 | or a Contribution incorporated within the Work constitutes direct 87 | or contributory patent infringement, then any patent licenses 88 | granted to You under this License for that Work shall terminate 89 | as of the date such litigation is filed. 90 | 91 | 4. Redistribution. You may reproduce and distribute copies of the 92 | Work or Derivative Works thereof in any medium, with or without 93 | modifications, and in Source or Object form, provided that You 94 | meet the following conditions: 95 | 96 | (a) You must give any other recipients of the Work or 97 | Derivative Works a copy of this License; and 98 | 99 | (b) You must cause any modified files to carry prominent notices 100 | stating that You changed the files; and 101 | 102 | (c) You must retain, in the Source form of any Derivative Works 103 | that You distribute, all copyright, patent, trademark, and 104 | attribution notices from the Source form of the Work, 105 | excluding those notices that do not pertain to any part of 106 | the Derivative Works; and 107 | 108 | (d) If the Work includes a "NOTICE" text file as part of its 109 | distribution, then any Derivative Works that You distribute must 110 | include a readable copy of the attribution notices contained 111 | within such NOTICE file, excluding those notices that do not 112 | pertain to any part of the Derivative Works, in at least one 113 | of the following places: within a NOTICE text file distributed 114 | as part of the Derivative Works; within the Source form or 115 | documentation, if provided along with the Derivative Works; or, 116 | within a display generated by the Derivative Works, if and 117 | wherever such third-party notices normally appear. The contents 118 | of the NOTICE file are for informational purposes only and 119 | do not modify the License. You may add Your own attribution 120 | notices within Derivative Works that You distribute, alongside 121 | or as an addendum to the NOTICE text from the Work, provided 122 | that such additional attribution notices cannot be construed 123 | as modifying the License. 124 | 125 | You may add Your own copyright statement to Your modifications and 126 | may provide additional or different license terms and conditions 127 | for use, reproduction, or distribution of Your modifications, or 128 | for any such Derivative Works as a whole, provided Your use, 129 | reproduction, and distribution of the Work otherwise complies with 130 | the conditions stated in this License. 131 | 132 | 5. Submission of Contributions. Unless You explicitly state otherwise, 133 | any Contribution intentionally submitted for inclusion in the Work 134 | by You to the Licensor shall be under the terms and conditions of 135 | this License, without any additional terms or conditions. 136 | Notwithstanding the above, nothing herein shall supersede or modify 137 | the terms of any separate license agreement you may have executed 138 | with Licensor regarding such Contributions. 139 | 140 | 6. Trademarks. This License does not grant permission to use the trade 141 | names, trademarks, service marks, or product names of the Licensor, 142 | except as required for reasonable and customary use in describing the 143 | origin of the Work and reproducing the content of the NOTICE file. 144 | 145 | 7. Disclaimer of Warranty. Unless required by applicable law or 146 | agreed to in writing, Licensor provides the Work (and each 147 | Contributor provides its Contributions) on an "AS IS" BASIS, 148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 149 | implied, including, without limitation, any warranties or conditions 150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 151 | PARTICULAR PURPOSE. You are solely responsible for determining the 152 | appropriateness of using or redistributing the Work and assume any 153 | risks associated with Your exercise of permissions under this License. 154 | 155 | 8. Limitation of Liability. In no event and under no legal theory, 156 | whether in tort (including negligence), contract, or otherwise, 157 | unless required by applicable law (such as deliberate and grossly 158 | negligent acts) or agreed to in writing, shall any Contributor be 159 | liable to You for damages, including any direct, indirect, special, 160 | incidental, or consequential damages of any character arising as a 161 | result of this License or out of the use or inability to use the 162 | Work (including but not limited to damages for loss of goodwill, 163 | work stoppage, computer failure or malfunction, or any and all 164 | other commercial damages or losses), even if such Contributor 165 | has been advised of the possibility of such damages. 166 | 167 | 9. Accepting Warranty or Additional Liability. While redistributing 168 | the Work or Derivative Works thereof, You may choose to offer, 169 | and charge a fee for, acceptance of support, warranty, indemnity, 170 | or other liability obligations and/or rights consistent with this 171 | License. However, in accepting such obligations, You may act only 172 | on Your own behalf and on Your sole responsibility, not on behalf 173 | of any other Contributor, and only if You agree to indemnify, 174 | defend, and hold each Contributor harmless for any liability 175 | incurred by, or claims asserted against, such Contributor by reason 176 | of your accepting any such warranty or additional liability. 177 | 178 | END OF TERMS AND CONDITIONS 179 | -------------------------------------------------------------------------------- /Src/SplitPipelineCommand.cs: -------------------------------------------------------------------------------- 1 | using System.Collections; 2 | using System.Diagnostics; 3 | using System.Management.Automation; 4 | using System.Management.Automation.Runspaces; 5 | using System.Reflection; 6 | 7 | namespace SplitPipeline; 8 | 9 | [Cmdlet(VerbsCommon.Split, "Pipeline")] 10 | public sealed class SplitPipelineCommand : PSCmdlet, IDisposable 11 | { 12 | [Parameter(Position = 0, Mandatory = true)] 13 | public ScriptBlock Script { get; set; } 14 | 15 | [Parameter(Position = 1, ValueFromPipeline = true)] 16 | public PSObject InputObject { get; set; } 17 | 18 | [Parameter] 19 | public ScriptBlock Begin { get; set; } 20 | 21 | [Parameter] 22 | public ScriptBlock End { get; set; } 23 | 24 | [Parameter] 25 | public ScriptBlock Finally { get; set; } 26 | 27 | [Parameter] 28 | public string[] Variable { get; set; } 29 | 30 | [Parameter] 31 | public string[] Function { get; set; } 32 | 33 | [Parameter] 34 | public string[] Module { get; set; } 35 | 36 | [Parameter] 37 | [ValidateCount(1, 2)] 38 | public int[] Count 39 | { 40 | get { return null; } 41 | set 42 | { 43 | if (value[0] < 1) 44 | return; 45 | 46 | if (value.Length == 1) 47 | _Count = value[0]; 48 | else if (value[0] > value[1]) 49 | throw new PSArgumentException("Count maximum must be greater or equal to minimum."); 50 | else 51 | _Count = Math.Max(value[0], Math.Min(value[1], Environment.ProcessorCount)); 52 | } 53 | } 54 | int _Count; 55 | 56 | [Parameter] 57 | public SwitchParameter Order { get; set; } 58 | 59 | [Parameter] 60 | public SwitchParameter Refill { get; set; } 61 | 62 | [Parameter] 63 | [ValidateCount(1, 2)] 64 | public int[] Load 65 | { 66 | get { return _Load; } 67 | set 68 | { 69 | if (value[0] < 1) 70 | return; 71 | 72 | if (value.Length == 2 && value[0] > value[1]) 73 | throw new PSArgumentException("Load maximum must be greater than or equal to minimum."); 74 | 75 | _Load = value; 76 | MinLoad = value[0]; 77 | if (value.Length == 2) 78 | MaxLoad = value[1]; 79 | } 80 | } 81 | int[] _Load; 82 | int MinLoad = 1; 83 | int MaxLoad = int.MaxValue; 84 | int MaxQueue = int.MaxValue; 85 | 86 | [Parameter] 87 | public PSObject Filter 88 | { 89 | get { return _Filter; } 90 | set 91 | { 92 | if (value != null) 93 | { 94 | _Filter = value; 95 | _FilterHash = value.BaseObject as IDictionary; 96 | if (_FilterHash == null) 97 | { 98 | _FilterScript = value.BaseObject as ScriptBlock; 99 | if (_FilterScript == null) 100 | throw new PSArgumentException("Expected a hashtable or a script block."); 101 | } 102 | } 103 | } 104 | } 105 | PSObject _Filter; 106 | IDictionary _FilterHash; 107 | ScriptBlock _FilterScript; 108 | 109 | [Parameter] 110 | public ApartmentState ApartmentState 111 | { 112 | set => _ApartmentState = value; 113 | } 114 | ApartmentState? _ApartmentState; 115 | 116 | readonly InitialSessionState _iss = InitialSessionState.CreateDefault(); 117 | readonly Queue _queue = new Queue(); 118 | readonly LinkedList _done = new LinkedList(); 119 | readonly LinkedList _work = new LinkedList(); 120 | readonly Stopwatch _infoTimeTotal = Stopwatch.StartNew(); 121 | readonly object _syncObject = new object(); 122 | string _Script, _Begin, _End, _Finally; 123 | bool xStop; 124 | bool _closed; 125 | bool _verbose; 126 | bool _isExpectingInput; 127 | int _infoItemCount; 128 | int _infoPartCount; 129 | int _infoWaitCount; 130 | int _infoMaxQueue; 131 | 132 | protected override void BeginProcessing() 133 | { 134 | // convert scripts to strings 135 | _Script = Script.ToString(); 136 | if (Begin != null) 137 | _Begin = Begin.ToString(); 138 | if (End != null) 139 | _End = End.ToString(); 140 | if (Finally != null) 141 | _Finally = Finally.ToString(); 142 | 143 | // Count 144 | if (_Count <= 0) 145 | _Count = Environment.ProcessorCount; 146 | 147 | // MaxQueue after Count 148 | if (MaxLoad < int.MaxValue / _Count) 149 | MaxQueue = _Count * MaxLoad; 150 | 151 | // to import modules 152 | if (Module != null) 153 | _iss.ImportPSModule(Module); 154 | 155 | // import variables 156 | _iss.Variables.Add(new SessionStateVariableEntry("LogEngineLifeCycleEvent", false, string.Empty)); // whole log disabled 157 | _iss.Variables.Add(new SessionStateVariableEntry("LogProviderLifeCycleEvent", false, string.Empty)); // start is still logged 158 | _iss.Variables.Add(new SessionStateVariableEntry("Pipeline", new Helper(), "Pipeline helper")); 159 | if (Variable != null) 160 | { 161 | foreach (var name in Variable) 162 | _iss.Variables.Add(new SessionStateVariableEntry(name, GetVariableValue(name), string.Empty)); 163 | } 164 | 165 | // import functions 166 | if (Function != null) 167 | { 168 | foreach (var name in Function) 169 | { 170 | var function = (FunctionInfo)SessionState.InvokeCommand.GetCommand(name, CommandTypes.Function); 171 | _iss.Commands.Add(new SessionStateFunctionEntry(name, function.Definition)); 172 | } 173 | } 174 | 175 | // verbose state 176 | if (MyInvocation.BoundParameters.TryGetValue("Verbose", out object parameter)) 177 | { 178 | _verbose = ((SwitchParameter)parameter).ToBool(); 179 | } 180 | else 181 | { 182 | // #12 VerbosePreference value can be anything 183 | if (LanguagePrimitives.TryConvertTo(GetVariableValue("VerbosePreference"), out ActionPreference preference)) 184 | _verbose = preference != ActionPreference.SilentlyContinue; 185 | } 186 | 187 | // if items are sent as the parameter then enque them 188 | _isExpectingInput = MyInvocation.ExpectingInput; 189 | if (!_isExpectingInput) 190 | { 191 | var items = LanguagePrimitives.GetEnumerable(InputObject); 192 | if (items == null) 193 | { 194 | _isExpectingInput = true; 195 | } 196 | else 197 | { 198 | foreach (var it in items) 199 | if (it == null) 200 | Enqueue(null); 201 | else 202 | Enqueue(PSObject.AsPSObject(it)); 203 | } 204 | } 205 | } 206 | protected override void ProcessRecord() 207 | { 208 | try 209 | { 210 | // add to the queue 211 | if (_isExpectingInput) 212 | Enqueue(InputObject); 213 | 214 | // simple mode or too few items for a job? 215 | if (Load == null || _queue.Count < MinLoad) 216 | return; 217 | 218 | // force feed while the queue is too large; 219 | // NB: Feed with Refill may add new items 220 | while (_queue.Count >= MaxQueue && !xStop) 221 | Feed(true); 222 | 223 | // try to feed available jobs normally 224 | if (_queue.Count >= MinLoad && !xStop) 225 | Feed(false); 226 | } 227 | catch 228 | { 229 | // ignore errors on stopping 230 | if (!xStop) 231 | throw; 232 | } 233 | } 234 | protected override void EndProcessing() 235 | { 236 | try 237 | { 238 | // force feed while there are items or working jobs 239 | // NB: jobs with Refill may add new items 240 | while (_queue.Count > 0 || _work.Count > 0) 241 | { 242 | if (xStop) 243 | return; 244 | 245 | // verbose info 246 | if (_verbose) 247 | WriteVerbose(string.Format(null, "Split-Pipeline: Jobs = {0}; Load = End; Queue = {1}", _work.Count, _queue.Count)); 248 | 249 | // #10 nothing to feed, wait 250 | if (_queue.Count == 0) 251 | Wait(); 252 | 253 | Feed(true); 254 | } 255 | 256 | // summary info 257 | if (xStop) 258 | return; 259 | if (_verbose) 260 | WriteVerbose(string.Format(null, @"Split-Pipeline: 261 | Item count = {0} 262 | Part count = {1} 263 | Pipe count = {2} 264 | Wait count = {3} 265 | Max queue = {4} 266 | Total time = {5} 267 | Items /sec = {6} 268 | ", _infoItemCount 269 | , _infoPartCount 270 | , _done.Count 271 | , _infoWaitCount 272 | , _infoMaxQueue 273 | , _infoTimeTotal.Elapsed 274 | , _infoItemCount / _infoTimeTotal.Elapsed.TotalSeconds)); 275 | 276 | // invoke the end script 277 | if (_End != null) 278 | { 279 | foreach (var job in _done) 280 | { 281 | if (xStop) 282 | return; 283 | WriteResults(job, job.InvokeEnd(_End)); 284 | } 285 | } 286 | } 287 | catch 288 | { 289 | // ignore errors on stopping 290 | if (!xStop) 291 | throw; 292 | } 293 | } 294 | protected override void StopProcessing() 295 | { 296 | xStop = true; 297 | Close(); 298 | } 299 | public void Dispose() 300 | { 301 | if (!_closed) 302 | Close(); 303 | } 304 | 305 | /// 306 | /// Adds the object to the queue unless it is filtered out. 307 | /// Callers check the maximum queue count. 308 | /// 309 | void Enqueue(PSObject value) 310 | { 311 | // filter 312 | if (Filter != null) 313 | { 314 | if (_FilterHash != null) 315 | { 316 | if (_FilterHash.Contains(value.BaseObject)) 317 | return; 318 | 319 | _FilterHash.Add(value, null); 320 | } 321 | else 322 | { 323 | if (!LanguagePrimitives.IsTrue(_FilterScript.InvokeReturnAsIs(value))) 324 | return; 325 | } 326 | } 327 | 328 | // enqueue 329 | _queue.Enqueue(value); 330 | 331 | // update info 332 | ++_infoItemCount; 333 | if (_infoMaxQueue < _queue.Count) 334 | _infoMaxQueue = _queue.Count; 335 | } 336 | /// 337 | /// Gets the next part of input items and feeds them to a ready job. 338 | /// If forced waits for a ready job. 339 | /// 340 | void Feed(bool force) 341 | { 342 | // try to make more jobs ready and more input available on Refill 343 | Take(); 344 | 345 | // no input? check this after taking, Refill adds input on taking 346 | if (_queue.Count == 0) 347 | return; 348 | 349 | // all busy? 350 | if (_Count - _work.Count == 0) 351 | { 352 | // no ready jobs, done if not forced 353 | if (!force) 354 | return; 355 | 356 | // wait for jobs and make them ready 357 | Wait(); 358 | Take(); 359 | } 360 | 361 | // split the queue equally between all potential jobs 362 | int load = _queue.Count / _Count; 363 | if (load * _Count < _queue.Count) 364 | ++load; 365 | 366 | // check limits 367 | if (load < MinLoad) 368 | load = MinLoad; 369 | else if (load > MaxLoad) 370 | load = MaxLoad; 371 | 372 | lock (_syncObject) 373 | { 374 | int nReadyJobs = _Count - _work.Count; 375 | if (xStop || nReadyJobs == 0) 376 | return; 377 | 378 | do 379 | { 380 | // limit load by the queue 381 | if (load > _queue.Count) 382 | { 383 | load = _queue.Count; 384 | 385 | // if load is less than minimum and not forced then exit 386 | if (load < MinLoad && !force) 387 | return; 388 | } 389 | 390 | // next job node 391 | LinkedListNode node = _done.First; 392 | if (node == null) 393 | { 394 | // v1.4.2 Runspaces use the same host as the cmdlet. 395 | var runspace = RunspaceFactory.CreateRunspace(Host, _iss); 396 | if (_ApartmentState.HasValue) 397 | { 398 | var info = typeof(Runspace).GetProperty("ApartmentState", BindingFlags.Public | BindingFlags.Instance); 399 | info.SetValue(runspace, _ApartmentState.Value, null); 400 | } 401 | 402 | var job = new Job(runspace); 403 | node = new LinkedListNode(job); 404 | _work.AddLast(node); 405 | WriteResults(job, job.InvokeBegin(_Begin, _Script)); 406 | } 407 | else 408 | { 409 | _done.RemoveFirst(); 410 | _work.AddLast(node); 411 | } 412 | 413 | if (xStop) 414 | return; 415 | 416 | // feed the job 417 | ++_infoPartCount; 418 | node.Value.BeginInvoke(_queue, load); 419 | 420 | // show feed info 421 | if (_verbose) 422 | WriteVerbose(string.Format(null, "Split-Pipeline: Jobs = {0}; Load = {1}; Queue = {2}", _work.Count, load, _queue.Count)); 423 | } 424 | while (!xStop && --nReadyJobs > 0 && _queue.Count > 0); 425 | } 426 | } 427 | /// 428 | /// Finds finished jobs, writes their output, moves them to done. 429 | /// If Order stops on the first found working job, it should finish. 430 | /// 431 | void Take() 432 | { 433 | lock (_syncObject) 434 | { 435 | var node = _work.First; 436 | while (node != null) 437 | { 438 | if (node.Value.IsWorking) 439 | { 440 | if (Order) 441 | break; 442 | 443 | node = node.Next; 444 | continue; 445 | } 446 | 447 | // complete the job 448 | var job = node.Value; 449 | if (xStop) 450 | return; 451 | WriteResults(job, job.EndInvoke()); 452 | 453 | // move node to done, do next 454 | var next = node.Next; 455 | _work.Remove(node); 456 | _done.AddLast(node); 457 | node = next; 458 | } 459 | } 460 | } 461 | /// 462 | /// Waits for any job to finish. If Order then its the first job in the queue. 463 | /// 464 | void Wait() 465 | { 466 | var wait = new List(_Count); 467 | 468 | lock (_syncObject) 469 | { 470 | ++_infoWaitCount; 471 | 472 | if (Order) 473 | { 474 | var node = _work.First; 475 | var job = node.Value; 476 | WriteResults(job, job.EndInvoke()); 477 | _work.Remove(node); 478 | _done.AddLast(node); 479 | return; 480 | } 481 | 482 | foreach (var job in _work) 483 | wait.Add(job.WaitHandle); 484 | } 485 | 486 | //! issue #3: used to hang 487 | WaitHandle.WaitAny(wait.ToArray()); 488 | } 489 | /// 490 | /// Writes job output objects and propagates streams. 491 | /// Moves refilling objects from output to the queue. 492 | /// 493 | /// 494 | /// v1.4.2 Only errors are propagated, other streams are written to the host. 495 | /// 496 | void WriteResults(Job job, ICollection output) 497 | { 498 | // process output 499 | if (output != null && output.Count > 0) 500 | { 501 | if (Refill) 502 | { 503 | foreach (var it in output) 504 | { 505 | if (it != null) 506 | { 507 | if (it.BaseObject is PSReference reference) 508 | Enqueue(new PSObject(reference.Value)); 509 | else 510 | WriteObject(it); 511 | } 512 | } 513 | } 514 | else 515 | { 516 | foreach (var it in output) 517 | WriteObject(it); 518 | } 519 | } 520 | 521 | // process streams 522 | var streams = job.Streams; 523 | 524 | // v1.4.2 Even with the shared host errors must be propagated explicitly. 525 | if (streams.Error.Count > 0) 526 | { 527 | foreach (var record in streams.Error) 528 | WriteError(record); 529 | } 530 | 531 | // ensure warnings are added to the variable 532 | // https://github.com/nightroman/SplitPipeline/issues/29 533 | if (streams.Warning.Count > 0 && MyInvocation.BoundParameters.TryGetValue("WarningVariable", out var warningVariable)) 534 | { 535 | var list = (ArrayList)GetVariableValue((string)warningVariable); 536 | foreach (var record in streams.Warning) 537 | list.Add(record); 538 | } 539 | 540 | // v1.4.2 Debug, progress, verbose, and warning messages are written to the host. 541 | // But streams are still populated, so we clear them on writing results. 542 | // NB: It is possible to log these streams in addition. 543 | streams.ClearStreams(); 544 | } 545 | /// 546 | /// Moves all jobs to done then for each jobs: 547 | /// -- calls the finally script; 548 | /// -- closes the job. 549 | /// 550 | void Close() 551 | { 552 | lock (_syncObject) 553 | { 554 | // close once 555 | if (_closed) 556 | return; 557 | _closed = true; 558 | 559 | // move jobs to done 560 | while (_work.Count > 0) 561 | { 562 | var node = _work.First; 563 | _work.RemoveFirst(); 564 | _done.AddLast(node); 565 | } 566 | 567 | // done? 568 | if (_done.Count == 0) 569 | return; 570 | 571 | // invoke the finally script always, do not throw, closing is ahead 572 | if (_Finally != null) 573 | { 574 | // let them all to work 575 | var exceptions = new List(); 576 | foreach (var job in _done) 577 | { 578 | try 579 | { 580 | job.InvokeFinally(_Finally); 581 | } 582 | catch (Exception e) 583 | { 584 | exceptions.Add(e); 585 | } 586 | } 587 | 588 | // then write errors as warnings 589 | if (exceptions.Count > 0 && !xStop) 590 | { 591 | try 592 | { 593 | foreach (var e in exceptions) 594 | WriteWarning("Exception in Finally: " + e.Message); 595 | } 596 | catch (RuntimeException) 597 | { } 598 | } 599 | } 600 | 601 | // close jobs 602 | foreach (var job in _done) 603 | job.Close(); 604 | } 605 | } 606 | } 607 | --------------------------------------------------------------------------------