├── .ci
    └── view_component_trigger
    │   ├── Jenkinsfile
    │   └── jobs.groovy
├── .clang-format
├── .github
    ├── CODEOWNERS
    └── workflows
    │   └── pre-commit.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── CMakeLists.txt
├── License.txt
├── README.md
├── cbt.json
├── cmake
    └── Modules
    │   ├── AddGPLibrary.cmake
    │   └── Findzoltan.cmake
├── exported_symbols_linux.lds
├── exported_symbols_osx.lds
├── include
    ├── popfloat
    │   └── experimental
    │   │   ├── CastToGfloat.hpp
    │   │   ├── CastToHalf.hpp
    │   │   ├── GfloatExpr.hpp
    │   │   ├── GfloatExprUtil.hpp
    │   │   └── codelets.hpp
    ├── popfloat_mock
    │   ├── Mock.hpp
    │   └── MockFixture.hpp
    ├── poplibs_support
    │   ├── Algorithms.hpp
    │   ├── CTCInferenceDefs.hpp
    │   ├── Compiler.hpp
    │   ├── ContiguousRegionsByTile.hpp
    │   ├── ExternalCodelet.hpp
    │   ├── FlopEstimation.hpp
    │   ├── HashTuple.hpp
    │   ├── IclUtil.hpp
    │   ├── LogArithmetic.hpp
    │   ├── Memoize.hpp
    │   ├── MultiArray.hpp
    │   ├── PlanConstraints.hpp
    │   ├── StridedRegions.hpp
    │   ├── TestDevice.hpp
    │   ├── TraceChannels.hpp
    │   ├── Tracepoint.hpp
    │   ├── VectorUtils.hpp
    │   ├── Visitor.hpp
    │   ├── codelets.hpp
    │   ├── forceInterleavedEstimates.hpp
    │   ├── logging.hpp
    │   ├── print.hpp
    │   └── vv_iterator.hpp
    ├── poplibs_test
    │   ├── CTCInference.hpp
    │   ├── CTCLoss.hpp
    │   ├── CTCUtil.hpp
    │   ├── Check.hpp
    │   ├── Convolution.hpp
    │   ├── Embedding.hpp
    │   ├── FullyConnected.hpp
    │   ├── GeneralMatrixAdd.hpp
    │   ├── GeneralMatrixMultiply.hpp
    │   ├── Gru.hpp
    │   ├── Lstm.hpp
    │   ├── MatrixTransforms.hpp
    │   ├── Multirate.hpp
    │   ├── NonLinearity.hpp
    │   ├── Norms.hpp
    │   ├── Pass.hpp
    │   ├── Pooling.hpp
    │   ├── ProgressBar.hpp
    │   ├── ROIAlign.hpp
    │   ├── Reduce.hpp
    │   ├── Rnn.hpp
    │   ├── SparseMatrix.hpp
    │   ├── TempDir.hpp
    │   ├── Util.hpp
    │   └── exceptions.hpp
    ├── poplin
    │   ├── Cholesky.hpp
    │   ├── ConvParams.hpp
    │   ├── ConvPreplan.hpp
    │   ├── ConvUtil.hpp
    │   ├── Convolution.hpp
    │   ├── FullyConnected.hpp
    │   ├── MatMul.hpp
    │   ├── MeshGrid.hpp
    │   ├── MultiConvolution.hpp
    │   ├── Norms.hpp
    │   ├── TriangularSolve.hpp
    │   ├── codelets.hpp
    │   └── experimental
    │   │   ├── LuFactorization.hpp
    │   │   └── QRFactorization.hpp
    ├── poplin_mock
    │   ├── Mock.hpp
    │   └── MockFixture.hpp
    ├── popnn
    │   ├── BatchNorm.hpp
    │   ├── CTCInference.hpp
    │   ├── CTCLoss.hpp
    │   ├── CTCPlan.hpp
    │   ├── GroupNorm.hpp
    │   ├── Gru.hpp
    │   ├── GruDef.hpp
    │   ├── InstanceNorm.hpp
    │   ├── LayerNorm.hpp
    │   ├── LogSoftmax.hpp
    │   ├── Loss.hpp
    │   ├── Lstm.hpp
    │   ├── LstmDef.hpp
    │   ├── NonLinearity.hpp
    │   ├── NonLinearityDef.hpp
    │   ├── NonLinearityDefUtil.hpp
    │   ├── Norms.hpp
    │   ├── Pooling.hpp
    │   ├── PoolingDef.hpp
    │   ├── Recurrent.hpp
    │   ├── Rnn.hpp
    │   ├── SpatialSoftMax.hpp
    │   ├── codelets.hpp
    │   └── experimental
    │   │   └── ROIAlign.hpp
    ├── popnn_mock
    │   ├── Mock.hpp
    │   └── MockFixture.hpp
    ├── popops
    │   ├── AllTrue.hpp
    │   ├── Cast.hpp
    │   ├── CircBuf.hpp
    │   ├── DynamicSlice.hpp
    │   ├── ElementWise.hpp
    │   ├── ElementWiseUtil.hpp
    │   ├── Encoding.hpp
    │   ├── EncodingConstants.hpp
    │   ├── Expr.hpp
    │   ├── ExprOp.hpp
    │   ├── ExprOpUtils.hpp
    │   ├── Fill.hpp
    │   ├── Gather.hpp
    │   ├── GatherStatistics.hpp
    │   ├── HostSliceTensor.hpp
    │   ├── Loop.hpp
    │   ├── NaN.hpp
    │   ├── NormaliseImage.hpp
    │   ├── Operation.hpp
    │   ├── OperationDef.hpp
    │   ├── OperationDefUtil.hpp
    │   ├── Pad.hpp
    │   ├── PerformanceEstimation.hpp
    │   ├── Rearrange.hpp
    │   ├── Reduce.hpp
    │   ├── ScaledAdd.hpp
    │   ├── Scatter.hpp
    │   ├── SelectScalarFromRows.hpp
    │   ├── SequenceSlice.hpp
    │   ├── Sort.hpp
    │   ├── SortOrder.hpp
    │   ├── SplineBasis.hpp
    │   ├── SplineWeighting.hpp
    │   ├── TopK.hpp
    │   ├── UpdateScalarInRows.hpp
    │   ├── Zero.hpp
    │   └── codelets.hpp
    ├── popops_mock
    │   ├── Matchers.hpp
    │   ├── Mock.hpp
    │   └── MockFixture.hpp
    ├── poprand
    │   ├── RandomGen.hpp
    │   └── codelets.hpp
    ├── poprand_mock
    │   ├── Mock.hpp
    │   └── MockFixture.hpp
    ├── popsparse
    │   ├── Embedding.hpp
    │   ├── FullyConnected.hpp
    │   ├── FullyConnectedParams.hpp
    │   ├── MatMul.hpp
    │   ├── MatMulParams.hpp
    │   ├── PlanningCache.hpp
    │   ├── SparsePartitioner.hpp
    │   ├── SparseStorageFormats.hpp
    │   ├── SparseTensor.hpp
    │   ├── SparsityParams.hpp
    │   ├── codelets.hpp
    │   └── experimental
    │   │   ├── BlockSparse.hpp
    │   │   └── BlockSparseMatMul.hpp
    ├── poputil
    │   ├── Broadcast.hpp
    │   ├── DebugInfo.hpp
    │   ├── GraphFunction.hpp
    │   ├── OptionParsing.hpp
    │   ├── TensorMetaData.hpp
    │   ├── TileMapping.hpp
    │   ├── Util.hpp
    │   ├── VarStructure.hpp
    │   ├── VertexTemplates.hpp
    │   ├── cyclesTables.hpp
    │   └── exceptions.hpp
    └── poputil_mock
    │   ├── Mock.hpp
    │   └── MockFixture.hpp
├── lib
    ├── CMakeLists.txt
    ├── popfloat
    │   ├── CMakeLists.txt
    │   ├── CastToGfloat.cpp
    │   ├── CastToHalf.cpp
    │   ├── GfloatExprUtil.cpp
    │   ├── codelets.cpp
    │   ├── codelets
    │   │   ├── CastFloatToGf16.cpp
    │   │   ├── CastFloatToGf8.cpp
    │   │   ├── CastGf16ToFloat.cpp
    │   │   ├── CastGf8ToFloat.cpp
    │   │   ├── CastGf8ToHalf.cpp
    │   │   ├── CastHalfToGf8.cpp
    │   │   ├── CastToGfloat16.cpp
    │   │   ├── CastToGfloat16InPlace.cpp
    │   │   ├── CastToGfloat16Param.cpp
    │   │   ├── CastToGfloat16Sr.cpp
    │   │   ├── CastToGfloat16SrInPlace.cpp
    │   │   ├── CastToGfloat32.cpp
    │   │   ├── CastToGfloat32InPlace.cpp
    │   │   ├── CastToGfloat32Param.cpp
    │   │   ├── CastToGfloat32Sr.cpp
    │   │   ├── CastToGfloat32SrInPlace.cpp
    │   │   ├── PackedGfloatParams.cpp
    │   │   ├── asm
    │   │   │   ├── CastFloatToGF16.S
    │   │   │   ├── CastFloatToGF16.h
    │   │   │   ├── CastFloatToGF8.S
    │   │   │   ├── CastFloatToGF8.h
    │   │   │   ├── CastGF16ToFloat.S
    │   │   │   ├── CastGF16ToFloat.h
    │   │   │   ├── CastGF8ToFloat.S
    │   │   │   ├── CastGF8ToFloat.h
    │   │   │   ├── CastGF8ToHalf.S
    │   │   │   ├── CastGF8ToHalf.h
    │   │   │   ├── CastHalfToGF8.S
    │   │   │   ├── CastHalfToGF8.h
    │   │   │   ├── CastToGfloat16.S
    │   │   │   ├── CastToGfloat16.h
    │   │   │   ├── CastToGfloat16Param.S
    │   │   │   ├── CastToGfloat16Param.h
    │   │   │   ├── CastToGfloat16Sr.S
    │   │   │   ├── CastToGfloat16Sr.h
    │   │   │   ├── CastToGfloat32.S
    │   │   │   ├── CastToGfloat32.h
    │   │   │   ├── CastToGfloat32Param.S
    │   │   │   ├── CastToGfloat32Param.h
    │   │   │   ├── CastToGfloat32Sr.S
    │   │   │   ├── CastToGfloat32Sr.h
    │   │   │   ├── GfloatConst.hpp
    │   │   │   ├── PackedGfloatParams.S
    │   │   │   ├── PackedGfloatParams.h
    │   │   │   └── popfloatCommon.inc
    │   │   ├── popfloatCodelets.hpp
    │   │   ├── popfloatCycleCount.hpp
    │   │   └── popfloatUtils.hpp
    │   ├── popfloatCycleEstimators.cpp
    │   └── popfloatCycleEstimators.hpp
    ├── popfloat_mock
    │   ├── CMakeLists.txt
    │   └── codelets.cpp
    ├── poplibs-config.cmake
    ├── poplibs_mock-config.cmake
    ├── poplibs_support
    │   ├── Algorithms.cpp
    │   ├── CMakeLists.txt
    │   ├── ContiguousRegionsByTile.cpp
    │   ├── IclUtil.cpp
    │   ├── PlanConstraints.cpp
    │   ├── StridedRegions.cpp
    │   ├── TestDevice.cpp
    │   ├── TraceChannels.cpp
    │   ├── codelets.cpp
    │   ├── forceInterleavedEstimates.cpp
    │   └── logging.cpp
    ├── poplibs_test
    │   ├── CMakeLists.txt
    │   ├── CTCInference.cpp
    │   ├── CTCLoss.cpp
    │   ├── CTCUtil.cpp
    │   ├── Convolution.cpp
    │   ├── Embedding.cpp
    │   ├── FullyConnected.cpp
    │   ├── GeneralMatrixAdd.cpp
    │   ├── GeneralMatrixMultiply.cpp
    │   ├── Gru.cpp
    │   ├── Lstm.cpp
    │   ├── Multirate.cpp
    │   ├── NonLinearity.cpp
    │   ├── Norms.cpp
    │   ├── Pass.cpp
    │   ├── Pooling.cpp
    │   ├── Rnn.cpp
    │   └── Util.cpp
    ├── poplin
    │   ├── CMakeLists.txt
    │   ├── CanonicalConvParams.hpp
    │   ├── Cholesky.cpp
    │   ├── ConvModel.cpp
    │   ├── ConvModel.hpp
    │   ├── ConvOptions.cpp
    │   ├── ConvOptions.hpp
    │   ├── ConvParams.cpp
    │   ├── ConvPartialsStridesPacking.cpp
    │   ├── ConvPartialsStridesPacking.hpp
    │   ├── ConvPlan.cpp
    │   ├── ConvPlan.hpp
    │   ├── ConvPlanTypes.hpp
    │   ├── ConvPreplan.cpp
    │   ├── ConvProgramTree.hpp
    │   ├── ConvReduce.cpp
    │   ├── ConvReduce.hpp
    │   ├── ConvReducePlan.cpp
    │   ├── ConvReducePlan.hpp
    │   ├── ConvTransforms.cpp
    │   ├── ConvTransforms.hpp
    │   ├── ConvUtil.cpp
    │   ├── ConvUtilInternal.cpp
    │   ├── ConvUtilInternal.hpp
    │   ├── ConvValidation.cpp
    │   ├── ConvValidation.hpp
    │   ├── ConvVertexType.cpp
    │   ├── ConvVertexType.hpp
    │   ├── ConvVertices.cpp
    │   ├── ConvVertices.hpp
    │   ├── Convolution.cpp
    │   ├── ConvolutionInternal.hpp
    │   ├── ExchangeEstimator.hpp
    │   ├── FullyConnected.cpp
    │   ├── LuFactorization.cpp
    │   ├── MatMul.cpp
    │   ├── MatMulInternal.hpp
    │   ├── MeshGrid.cpp
    │   ├── MultiConvolution.cpp
    │   ├── MultiConvolutionInternal.hpp
    │   ├── Norms.cpp
    │   ├── PerformanceEstimation.hpp
    │   ├── PlanningCache.hpp
    │   ├── PlanningObjective.hpp
    │   ├── QRFactorization.cpp
    │   ├── TriangularSolve.cpp
    │   ├── Winograd.cpp
    │   ├── Winograd.hpp
    │   ├── codelets.cpp
    │   ├── codelets
    │   │   ├── Cholesky.cpp
    │   │   ├── ConvPartial1x1Out.cpp
    │   │   ├── ConvPartial1xNSLIC.cpp
    │   │   ├── ConvPartialHorizontalMac.cpp
    │   │   ├── ConvPartialHorizontalMac1x1.cpp
    │   │   ├── ConvPartialVerticalMac.cpp
    │   │   ├── ConvPartialnx1.cpp
    │   │   ├── Dot.hpp
    │   │   ├── InverseStdDeviation.cpp
    │   │   ├── LuFactorization.cpp
    │   │   ├── OuterProduct.cpp
    │   │   ├── QRFactorization.cpp
    │   │   ├── ReduceAdd.cpp
    │   │   ├── TriangularSolve.cpp
    │   │   ├── WgdConvComplete.cpp
    │   │   ├── WgdDataTransform.cpp
    │   │   ├── WgdInverseTransform.cpp
    │   │   ├── WgdKernelTransform.cpp
    │   │   ├── WgdPartials.cpp
    │   │   ├── WgdReduce.cpp
    │   │   ├── asm
    │   │   │   ├── ConvPartial1x4SLIC_half_float_8_and_half_half_16.S
    │   │   │   ├── ConvPartial1x4SLIC_half_float_8_and_half_half_16_cgpg_gt4.S
    │   │   │   ├── ConvPartial1x4SLIC_half_half_8.S
    │   │   │   ├── OuterProduct.S
    │   │   │   ├── ReduceAdd.S
    │   │   │   ├── ReduceAddSingleInput.S
    │   │   │   ├── conv_hzmac1x1_half_float.S
    │   │   │   ├── conv_hzmac_common.h.S
    │   │   │   ├── conv_hzmac_float_float.S
    │   │   │   ├── conv_hzmac_float_float_non_limited.S
    │   │   │   ├── conv_hzmac_half_float.S
    │   │   │   ├── conv_hzmac_half_float_non_limited.S
    │   │   │   ├── conv_hzmac_half_half.S
    │   │   │   ├── conv_hzmac_half_half_non_limited.S
    │   │   │   ├── conv_partial_1x1_float_float.S
    │   │   │   ├── conv_partial_1x1_float_float_16.S
    │   │   │   ├── conv_partial_1x1_half_float_8_and_half_half_h16.S
    │   │   │   ├── conv_partial_1x1_half_half.S
    │   │   │   ├── conv_partial_1x1_supervisor.S
    │   │   │   ├── conv_partial_nx1_float_float.S
    │   │   │   ├── conv_partial_nx1_float_float_16.S
    │   │   │   ├── conv_partial_nx1_half_float_8_and_half_half_16.S
    │   │   │   ├── conv_partial_nx1_half_half.S
    │   │   │   ├── conv_partial_nx1_supervisor.S
    │   │   │   ├── conv_partial_nx1_zero_output.S
    │   │   │   ├── conv_partial_zero_output_stack.h
    │   │   │   ├── conv_sr_ctrl.h.S
    │   │   │   ├── conv_vmac_common.h.S
    │   │   │   ├── conv_vmac_half_float.S
    │   │   │   ├── conv_vmac_half_half.S
    │   │   │   ├── conv_vmac_half_half_16.S
    │   │   │   └── conv_vmac_half_half_8.S
    │   │   ├── convCastSupport.hpp
    │   │   ├── inlineAssemblerConv.hpp
    │   │   ├── inlineAssemblerSLIC.hpp
    │   │   └── inlineAssemblerSLICStride2.hpp
    │   ├── poplinCycleEstimators.cpp
    │   └── poplinCycleEstimators.hpp
    ├── poplin_mock
    │   ├── CMakeLists.txt
    │   ├── MatMul.cpp
    │   └── codelets.cpp
    ├── popnn
    │   ├── BatchNorm.cpp
    │   ├── CMakeLists.txt
    │   ├── CTCInference.cpp
    │   ├── CTCInferenceConnection.cpp
    │   ├── CTCInferenceConnection.hpp
    │   ├── CTCInferencePlan.cpp
    │   ├── CTCInferencePlan.hpp
    │   ├── CTCLoss.cpp
    │   ├── CTCLossPlan.cpp
    │   ├── CTCLossPlan.hpp
    │   ├── CTCPlanInternal.cpp
    │   ├── CTCPlanInternal.hpp
    │   ├── CreatePoolingVertex.hpp
    │   ├── GroupNorm.cpp
    │   ├── Gru.cpp
    │   ├── HardSigmoid.cpp
    │   ├── HardSigmoid.hpp
    │   ├── LogSoftmax.cpp
    │   ├── Loss.cpp
    │   ├── Lstm.cpp
    │   ├── NonLinearity.cpp
    │   ├── NonLinearityInternal.hpp
    │   ├── Norms.cpp
    │   ├── NormsInternal.cpp
    │   ├── NormsInternal.hpp
    │   ├── PerformanceEstimation.hpp
    │   ├── PoolOptions.hpp
    │   ├── PoolPlan.cpp
    │   ├── PoolPlan.hpp
    │   ├── PoolVertices.cpp
    │   ├── PoolVertices.hpp
    │   ├── Pooling.cpp
    │   ├── PoolingDefUtil.hpp
    │   ├── ROIAlign.cpp
    │   ├── Recurrent.cpp
    │   ├── Rnn.cpp
    │   ├── RnnUtil.hpp
    │   ├── SpatialSoftMax.cpp
    │   ├── codelets.cpp
    │   ├── codelets
    │   │   ├── CTCInference.cpp
    │   │   ├── CTCLoss.cpp
    │   │   ├── CalcAccuracy.cpp
    │   │   ├── LogOps.hpp
    │   │   ├── LossCrossEntropyTransform.cpp
    │   │   ├── LossSumSquaredTransform.cpp
    │   │   ├── MaxPooling.cpp
    │   │   ├── MaxPoolingGrad.cpp
    │   │   ├── MaxPoolingGradientScale.cpp
    │   │   ├── MinHeapView.hpp
    │   │   ├── NonLinearity.hpp
    │   │   ├── NonLinearity1D.cpp
    │   │   ├── NonLinearity2D.cpp
    │   │   ├── NonLinearityGrad1D.cpp
    │   │   ├── NonLinearityGrad2D.cpp
    │   │   ├── ROIAlign.cpp
    │   │   ├── ReduceMaxClassGather.cpp
    │   │   ├── ReduceMaxClassSparse.cpp
    │   │   ├── ReduceMaxNClassGather.cpp
    │   │   ├── ReduceMaxNClassSparse.cpp
    │   │   ├── ReduceMinClassGather.cpp
    │   │   ├── ReduceMinClassSparse.cpp
    │   │   ├── SelectiveScaling.cpp
    │   │   ├── SumPooling.cpp
    │   │   └── asm
    │   │   │   ├── CTCInferenceSort.S
    │   │   │   ├── CTCInferenceUpdate.S
    │   │   │   ├── LossTransform.S
    │   │   │   ├── NonLinearity1DSwish.S
    │   │   │   ├── NonLinearity1D_gelu.S
    │   │   │   ├── NonLinearity2DSwish.S
    │   │   │   ├── NonLinearity2D_gelu.S
    │   │   │   ├── NonLinearityGrad1D.S
    │   │   │   ├── NonLinearityGrad1DSwish.S
    │   │   │   ├── NonLinearityGrad1D_gelu.S
    │   │   │   ├── NonLinearityGrad2D.S
    │   │   │   ├── NonLinearityGrad2DSwish.S
    │   │   │   ├── NonLinearityGrad2D_gelu.S
    │   │   │   ├── NonLinearityGradLoop_gelu.S
    │   │   │   ├── NonLinearityLoop_gelu.S
    │   │   │   ├── NonLinearitySwishCommon.S
    │   │   │   ├── Pooling.S
    │   │   │   ├── ReduceClassGather.S
    │   │   │   └── ReduceClassSparse.S
    │   ├── popnnCycleEstimators.cpp
    │   └── popnnCycleEstimators.hpp
    ├── popnn_mock
    │   ├── CMakeLists.txt
    │   └── codelets.cpp
    ├── popops
    │   ├── AllTrue.cpp
    │   ├── BitonicTopK.cpp
    │   ├── BitonicTopK.hpp
    │   ├── CMakeLists.txt
    │   ├── Cast.cpp
    │   ├── CastModelling.cpp
    │   ├── CastModelling.hpp
    │   ├── CircBuf.cpp
    │   ├── DynamicSlice.cpp
    │   ├── DynamicSliceInternal.hpp
    │   ├── ElementWise.cpp
    │   ├── ElementWiseInternal.cpp
    │   ├── ElementWiseInternal.hpp
    │   ├── ElementWiseUtil.cpp
    │   ├── ElementWiseUtilInternal.hpp
    │   ├── Encoding.cpp
    │   ├── ExchangeEstimator.cpp
    │   ├── ExchangeEstimator.hpp
    │   ├── Expr.cpp
    │   ├── ExprOpUtil.cpp
    │   ├── ExprOpUtil.hpp
    │   ├── ExprOpUtils.cpp
    │   ├── ExpressionGenerator.cpp
    │   ├── ExpressionGenerator.hpp
    │   ├── Fill.cpp
    │   ├── FillModelling.cpp
    │   ├── FillModelling.hpp
    │   ├── Gather.cpp
    │   ├── GatherInternal.cpp
    │   ├── GatherInternal.hpp
    │   ├── GatherStatistics.cpp
    │   ├── HistogramPerformanceEstimation.cpp
    │   ├── HistogramPerformanceEstimation.hpp
    │   ├── HostSliceTensor.cpp
    │   ├── Loop.cpp
    │   ├── NaN.cpp
    │   ├── NormaliseImage.cpp
    │   ├── Operation.cpp
    │   ├── Pad.cpp
    │   ├── Padder.cpp
    │   ├── Padder.hpp
    │   ├── PerformanceEstimation.cpp
    │   ├── Rearrange.cpp
    │   ├── RearrangeUtil.cpp
    │   ├── RearrangeUtil.hpp
    │   ├── ScalarMultiply.cpp
    │   ├── ScalarMultiply.hpp
    │   ├── ScaledAdd.cpp
    │   ├── ScaledAddModelling.cpp
    │   ├── ScaledAddModelling.hpp
    │   ├── Scatter.cpp
    │   ├── SelectScalarFromRows.cpp
    │   ├── SequenceSlice.cpp
    │   ├── Sort.cpp
    │   ├── SortOrder.cpp
    │   ├── SparseUtils.cpp
    │   ├── SparseUtils.hpp
    │   ├── SplineBasis.cpp
    │   ├── SplineWeighting.cpp
    │   ├── TopK.cpp
    │   ├── UpdateScalarInRows.cpp
    │   ├── VarianceToOrFromInvStdDev.cpp
    │   ├── Zero.cpp
    │   ├── codelets.cpp
    │   ├── codelets
    │   │   ├── BroadcastVectorInner1D.cpp
    │   │   ├── BroadcastVectorInner1DInPlace.cpp
    │   │   ├── BroadcastVectorInner2D.cpp
    │   │   ├── BroadcastVectorInner2DInPlace.cpp
    │   │   ├── CheckAccuracyWhenCast.cpp
    │   │   ├── CheckAccuracyWhenCast.hpp
    │   │   ├── CircBufIncrIndex.cpp
    │   │   ├── CircOffset.cpp
    │   │   ├── CompareAndSwapAtDistance.cpp
    │   │   ├── ContinuousReduce.cpp
    │   │   ├── ContinuousReduce.hpp
    │   │   ├── DynamicSlice1D.cpp
    │   │   ├── DynamicSlice2D.cpp
    │   │   ├── DynamicUpdateSlice1D.cpp
    │   │   ├── DynamicUpdateSlice2D.cpp
    │   │   ├── EncodeOneHot.cpp
    │   │   ├── EncodeOneHotCustomValues.cpp
    │   │   ├── HasNaN.cpp
    │   │   ├── HeapSort.hpp
    │   │   ├── HeapSortVertex.cpp
    │   │   ├── HeapSortVertexKV.cpp
    │   │   ├── Iota.cpp
    │   │   ├── MultiSlice.cpp
    │   │   ├── MultiSliceUpdateCommon.cpp
    │   │   ├── MultiSliceUpdateCommon.hpp
    │   │   ├── MultiUpdate.cpp
    │   │   ├── MultiUpdateOp.cpp
    │   │   ├── MultiUpdateOp.hpp
    │   │   ├── NormaliseImage.cpp
    │   │   ├── Reduce.cpp
    │   │   ├── ReduceCodelets.hpp
    │   │   ├── ScalarMultiply.cpp
    │   │   ├── ScaledContinuousReduce.cpp
    │   │   ├── ScaledReduce.cpp
    │   │   ├── SelectFromInterval.cpp
    │   │   ├── SelectFromIntervals.cpp
    │   │   ├── SelectFromRowsInColumns.cpp
    │   │   ├── SelectScalarFromRows.hpp
    │   │   ├── SeqSlice.cpp
    │   │   ├── SplineBasis.cpp
    │   │   ├── SplineWeighting.cpp
    │   │   ├── UpdateColumnsDEC.cpp
    │   │   ├── UpdateIntervalDEC.cpp
    │   │   ├── UpdateIntervalsDEC.cpp
    │   │   ├── asm
    │   │   │   ├── BinarySearch.S
    │   │   │   ├── BinarySearch.h.S
    │   │   │   ├── BroadcastSelect.S
    │   │   │   ├── Clamp.S
    │   │   │   ├── CommonPoplibsMacros.h.S
    │   │   │   ├── CompareAndSwapAtDistanceKeyVal_float_unsigned.S
    │   │   │   ├── ContinuousReductionAcc.S
    │   │   │   ├── ContinuousReductionNoAcc.S
    │   │   │   ├── EncodeOneHot.S
    │   │   │   ├── Fill.S
    │   │   │   ├── ForLoopCounter.S
    │   │   │   ├── HasNaN.S
    │   │   │   ├── MathConstants.S
    │   │   │   ├── MultiSlice.S
    │   │   │   ├── MultiSliceUpdateCommon.h.S
    │   │   │   ├── MultiUpdateOp.S
    │   │   │   ├── NormaliseImage.S
    │   │   │   ├── ReductionSpecial01Acc.S
    │   │   │   ├── ReductionSpecial01NoAcc.S
    │   │   │   ├── ReductionSpecial2.S
    │   │   │   ├── ReductionSpecial3.S
    │   │   │   ├── ReductionsCommon.h.S
    │   │   │   ├── ScalarMultiply.S
    │   │   │   ├── ScaledAdd2DXminusaXPlusbY_half.S
    │   │   │   ├── ScaledAdd2D_float.S
    │   │   │   ├── ScaledAdd2D_float_half.S
    │   │   │   ├── ScaledAdd2D_half.S
    │   │   │   ├── ScaledAdd2D_half_float.S
    │   │   │   ├── ScaledAdd2D_integral.S
    │   │   │   ├── ScaledAddSupervisor.inc
    │   │   │   ├── ScaledAddSupervisor_float_half.S
    │   │   │   ├── ScaledAddSupervisor_fp.S
    │   │   │   ├── ScaledAdd_half_half_float.S
    │   │   │   ├── ScaledAddaXPlusbY_mixed.S
    │   │   │   ├── SliceCopyFunction.S
    │   │   │   ├── SliceCopyFunction_8bit.inc
    │   │   │   ├── VarianceConversion1D.S
    │   │   │   ├── VarianceConversion2D.S
    │   │   │   ├── binaryOps1D.S
    │   │   │   ├── binaryOps2D.S
    │   │   │   ├── dynamicSlice.S
    │   │   │   ├── dynamicSlice.inc
    │   │   │   ├── dynamicSlice1D.S
    │   │   │   ├── dynamicSlice1D_8bit.S
    │   │   │   ├── histogram1DByData.S
    │   │   │   ├── histogram1DByLimit.S
    │   │   │   ├── histogram2D.S
    │   │   │   ├── histogramCommon.S
    │   │   │   ├── select_bool.S
    │   │   │   ├── select_half.S
    │   │   │   ├── select_int_float.S
    │   │   │   ├── unary2DNonLinearity.S
    │   │   │   ├── unaryOps1D.S
    │   │   │   ├── unaryOps1DNonLinearity.S
    │   │   │   ├── unaryOps2D.S
    │   │   │   ├── unaryOpsOps.h.S
    │   │   │   ├── vectorInnerAdd_float.S
    │   │   │   ├── vectorInnerAdd_half.S
    │   │   │   ├── vectorInnerCommon.h.S
    │   │   │   ├── vectorInnerDiv_float.S
    │   │   │   ├── vectorInnerDiv_half.S
    │   │   │   ├── vectorInnerMul_float.S
    │   │   │   ├── vectorInnerMul_half.S
    │   │   │   ├── vectorOuterOps1D.S
    │   │   │   └── workDivision.h.S
    │   │   ├── broadcastCodelets.cpp
    │   │   ├── elementwiseCodelets.hpp
    │   │   ├── elemwiseBinaryCodelets.cpp
    │   │   ├── elemwiseBinaryOps.hpp
    │   │   ├── elemwiseMiscCodelets.cpp
    │   │   ├── elemwiseScaledAddCodelets.cpp
    │   │   ├── elemwiseUnaryCodelets.cpp
    │   │   ├── inlineAssembler.hpp
    │   │   ├── inlineAssemblerCast.hpp
    │   │   ├── inlineAssemblerUnaryOps.hpp
    │   │   └── util.hpp
    │   ├── popopsCycleEstimators.cpp
    │   ├── popopsCycleEstimators.hpp
    │   └── reduction
    │   │   ├── ComputeSetList.cpp
    │   │   ├── ComputeSetList.hpp
    │   │   ├── CycleEstimationFunctions.cpp
    │   │   ├── CycleEstimationFunctions.hpp
    │   │   ├── IntermediatePartials.cpp
    │   │   ├── IntermediatePartials.hpp
    │   │   ├── IntermediatePartialsUtil.cpp
    │   │   ├── IntermediatePartialsUtil.hpp
    │   │   ├── Modelling.cpp
    │   │   ├── Modelling.hpp
    │   │   ├── Reduction.cpp
    │   │   ├── ReductionConnection.cpp
    │   │   ├── ReductionConnection.hpp
    │   │   ├── ReductionIntrospection.cpp
    │   │   ├── ReductionIntrospection.hpp
    │   │   ├── ReductionPlan.cpp
    │   │   ├── ReductionPlan.hpp
    │   │   ├── ReductionStages.cpp
    │   │   ├── ReductionStages.hpp
    │   │   ├── ReductionVertex.hpp
    │   │   ├── ReductionVertexDefs.hpp
    │   │   ├── RegionWrapping.cpp
    │   │   └── RegionWrapping.hpp
    ├── popops_mock
    │   ├── CMakeLists.txt
    │   ├── ElementWise.cpp
    │   ├── ElementWiseUtil.cpp
    │   └── codelets.cpp
    ├── poprand
    │   ├── CMakeLists.txt
    │   ├── RandomGen.cpp
    │   ├── codelets.cpp
    │   ├── codelets
    │   │   ├── Bernoulli.cpp
    │   │   ├── Dropout.cpp
    │   │   ├── Normal.cpp
    │   │   ├── RandomUtils.hpp
    │   │   ├── SetSeed.cpp
    │   │   ├── TruncatedNormal.cpp
    │   │   ├── Uniform.cpp
    │   │   └── asm
    │   │   │   ├── Bernoulli.S
    │   │   │   ├── Dropout.S
    │   │   │   ├── Normal.S
    │   │   │   ├── Seeds.S
    │   │   │   ├── TruncatedNormal.S
    │   │   │   ├── Uniform.S
    │   │   │   └── poprandCommon.inc
    │   ├── poprandCycleEstimators.cpp
    │   └── poprandCycleEstimators.hpp
    ├── poprand_mock
    │   ├── CMakeLists.txt
    │   └── codelets.cpp
    ├── popsparse
    │   ├── BSMatrix.cpp
    │   ├── BSMatrix.hpp
    │   ├── BSNonLinearity.cpp
    │   ├── BSNonLinearity.hpp
    │   ├── BSOps.cpp
    │   ├── BSOps.hpp
    │   ├── BSUtils.cpp
    │   ├── BSUtils.hpp
    │   ├── BalancedPartitioner.cpp
    │   ├── BalancedPartitioner.hpp
    │   ├── BlockSparseMatMul.cpp
    │   ├── CMakeLists.txt
    │   ├── Embedding.cpp
    │   ├── FullyConnected.cpp
    │   ├── FullyConnectedOnTile.cpp
    │   ├── FullyConnectedOnTile.hpp
    │   ├── FullyConnectedOptions.cpp
    │   ├── FullyConnectedOptions.hpp
    │   ├── FullyConnectedPNMapping.cpp
    │   ├── FullyConnectedPNMapping.hpp
    │   ├── FullyConnectedParams.cpp
    │   ├── FullyConnectedPlan.cpp
    │   ├── FullyConnectedPlan.hpp
    │   ├── FullyConnectedTensorMetaData.hpp
    │   ├── FullyConnectedUtils.cpp
    │   ├── FullyConnectedUtils.hpp
    │   ├── FullyConnectedVector.hpp
    │   ├── HyperGraph.cpp
    │   ├── HyperGraph.hpp
    │   ├── HyperGraphBlock.cpp
    │   ├── HyperGraphBlock.hpp
    │   ├── HyperGraphBlockGroup.cpp
    │   ├── HyperGraphBlockGroup.hpp
    │   ├── HyperGraphBlockGroup2.cpp
    │   ├── HyperGraphBlockGroup2.hpp
    │   ├── HyperGraphBlockNaive.cpp
    │   ├── HyperGraphBlockNaive.hpp
    │   ├── HyperGraphBlockZoltan.cpp
    │   ├── HyperGraphBlockZoltan.hpp
    │   ├── HyperGraphPartitioner.cpp
    │   ├── HyperGraphPartitioner.hpp
    │   ├── HyperGraphStrip.cpp
    │   ├── HyperGraphStrip.hpp
    │   ├── HyperGraphStripV0.cpp
    │   ├── HyperGraphStripV0.hpp
    │   ├── MatMul.cpp
    │   ├── MatMulOptions.cpp
    │   ├── MatMulOptions.hpp
    │   ├── MatMulParams.cpp
    │   ├── MatMulTensorMetaData.hpp
    │   ├── MatMulUtils.cpp
    │   ├── MatMulUtils.hpp
    │   ├── PerformanceEstimation.hpp
    │   ├── PlanningCache.cpp
    │   ├── PlanningCacheImpl.hpp
    │   ├── SparseCodeletMetaInfoScale.hpp
    │   ├── SparseFormatsValidate.cpp
    │   ├── SparseFormatsValidate.hpp
    │   ├── SparseMetaInfo.hpp
    │   ├── SparsePartitioner.cpp
    │   ├── SparsePartitionerImpl.cpp
    │   ├── SparsePartitionerImpl.hpp
    │   ├── SparsePartitionerOptions.cpp
    │   ├── SparsePartitionerOptions.hpp
    │   ├── SparseStorageInternal.hpp
    │   ├── SparseTensor.cpp
    │   ├── SparsityParams.cpp
    │   ├── StaticMatMul.cpp
    │   ├── StaticMatMulPartitioner.cpp
    │   ├── StaticMatMulPartitioner.hpp
    │   ├── ZoltanPartitioner.cpp
    │   ├── ZoltanPartitioner.hpp
    │   ├── codelets.cpp
    │   ├── codelets
    │   │   ├── BlockTransposeGradW.cpp
    │   │   ├── SparseDenseMatMulBlock.cpp
    │   │   ├── SparseDenseMatMulBlockAmpGradW.cpp
    │   │   ├── SparseDenseMatMulBlockGradA.cpp
    │   │   ├── SparseDenseMatMulBlockGradW.cpp
    │   │   ├── SparseDenseMatMulElementWise.cpp
    │   │   ├── SparseDenseMatMulElementWiseTranspose.cpp
    │   │   ├── SparseDenseMatMulGradAElementWise.cpp
    │   │   ├── SparseDenseMatMulGradWElementWise.cpp
    │   │   ├── SparseDenseMultiSliceBlock.cpp
    │   │   ├── SparseDenseMultiSliceElementWise.cpp
    │   │   ├── SparseGather.cpp
    │   │   ├── StaticSparseDenseElementWise.cpp
    │   │   ├── StaticSparseDenseMatMulBlock.cpp
    │   │   ├── Utils.cpp
    │   │   └── asm
    │   │   │   ├── Block16x16SparseDenseMatMulAmpGradW_half_float.S
    │   │   │   ├── Block16x16SparseDenseMatMulAmpGradW_half_half.S
    │   │   │   ├── Block16x16SparseDenseMatMulAmpGradW_half_half_2ampsets.S
    │   │   │   ├── Block16x16SparseDenseMatMulGradA_half_float.S
    │   │   │   ├── Block16x16SparseDenseMatMulGradA_half_half.S
    │   │   │   ├── Block16x16SparseDenseMatMulGradA_half_half_2ampsets.S
    │   │   │   ├── Block16x16SparseDenseMatMulGradW_half_float.S
    │   │   │   ├── Block16x16SparseDenseMatMulGradW_half_half.S
    │   │   │   ├── Block16x16SparseDenseMatMul_half_float.S
    │   │   │   ├── Block16x16SparseDenseMatMul_half_half.S
    │   │   │   ├── Block16x16SparseDenseMatMul_half_half_2ampsets.S
    │   │   │   ├── Block4x4SparseDenseMatMulAmpGradW_float_float.S
    │   │   │   ├── Block4x4SparseDenseMatMulAmpGradW_half_float.S
    │   │   │   ├── Block4x4SparseDenseMatMulAmpGradW_half_half.S
    │   │   │   ├── Block4x4SparseDenseMatMulGradA_float_float.S
    │   │   │   ├── Block4x4SparseDenseMatMulGradA_half_float.S
    │   │   │   ├── Block4x4SparseDenseMatMulGradA_half_half.S
    │   │   │   ├── Block4x4SparseDenseMatMulGradW_float_float.S
    │   │   │   ├── Block4x4SparseDenseMatMulGradW_half_float.S
    │   │   │   ├── Block4x4SparseDenseMatMulGradW_half_half.S
    │   │   │   ├── Block4x4SparseDenseMatMul_float_float.S
    │   │   │   ├── Block4x4SparseDenseMatMul_half_float.S
    │   │   │   ├── Block4x4SparseDenseMatMul_half_half.S
    │   │   │   ├── Block8x8SparseDenseMatMulAmpGradW_float_float.S
    │   │   │   ├── Block8x8SparseDenseMatMulAmpGradW_half_float.S
    │   │   │   ├── Block8x8SparseDenseMatMulAmpGradW_half_half.S
    │   │   │   ├── Block8x8SparseDenseMatMulGradA_float_float.S
    │   │   │   ├── Block8x8SparseDenseMatMulGradA_half_float.S
    │   │   │   ├── Block8x8SparseDenseMatMulGradA_half_half.S
    │   │   │   ├── Block8x8SparseDenseMatMulGradW_float_float.S
    │   │   │   ├── Block8x8SparseDenseMatMulGradW_half_float.S
    │   │   │   ├── Block8x8SparseDenseMatMulGradW_half_half.S
    │   │   │   ├── Block8x8SparseDenseMatMul_float_float.S
    │   │   │   ├── Block8x8SparseDenseMatMul_half_float.S
    │   │   │   ├── Block8x8SparseDenseMatMul_half_half.S
    │   │   │   ├── BlockMatMulGradWCommon.S
    │   │   │   ├── BlockSparseDenseMatMul.h.S
    │   │   │   ├── BlockSparseMatMulAmpGradW.h.S
    │   │   │   ├── BlockSparseMatMulGradW.h.S
    │   │   │   ├── BlockSparseMatMulStructs.h.S
    │   │   │   ├── BlockTransposeGradW.h.S
    │   │   │   ├── BlockTransposeGradW_float.S
    │   │   │   ├── BlockTransposeGradW_half.S
    │   │   │   ├── SparseDenseMatMulElementWise.h.S
    │   │   │   ├── SparseDenseMatMulElementWise_float_float.S
    │   │   │   ├── SparseDenseMatMulElementWise_half_float.S
    │   │   │   ├── SparseDenseMatMulGradAElementWise.h.S
    │   │   │   ├── SparseDenseMatMulGradAElementWise_float_float.S
    │   │   │   ├── SparseDenseMatMulGradAElementWise_half_float.S
    │   │   │   ├── SparseDenseMatMulGradWElementWise.h.S
    │   │   │   ├── SparseDenseMatMulGradWElementWise_float_float.S
    │   │   │   ├── SparseDenseMatMulGradWElementWise_half_float.S
    │   │   │   ├── SparseDenseMatMulStructs.h.S
    │   │   │   ├── SparseDenseMatMulTranspElementWise.h.S
    │   │   │   ├── SparseDenseMatMulTranspElementWise_float_float.S
    │   │   │   ├── SparseDenseMatMulTranspElementWise_half_float.S
    │   │   │   ├── SparseDenseMultiSliceBlock.S
    │   │   │   ├── SparseDenseMultiSliceElementWise.S
    │   │   │   ├── SparseDenseMultiUpdateAddBlock.S
    │   │   │   ├── SparseDenseMultiUpdateAddElementWise.S
    │   │   │   ├── SparseGatherElementWise.S
    │   │   │   ├── StaticBlock16x16SparseDenseMatMul_half_half.S
    │   │   │   ├── StaticBlock16x8SparseDenseMatMul_float_float.S
    │   │   │   ├── StaticBlock4x4SparseDenseMatMul_float_float.S
    │   │   │   ├── StaticBlock4x4SparseDenseMatMul_half_half.S
    │   │   │   ├── StaticBlock8x8SparseDenseMatMul_float_float.S
    │   │   │   ├── StaticBlock8x8SparseDenseMatMul_half_half.S
    │   │   │   ├── StaticBlockSparseDenseMatMul.h.S
    │   │   │   ├── StaticSparseDenseElementWise_float_float.S
    │   │   │   └── StaticSparseDenseElementWise_half_half.S
    │   ├── popsparseCycleEstimators.cpp
    │   └── popsparseCycleEstimators.hpp
    ├── poputil
    │   ├── Broadcast.cpp
    │   ├── CMakeLists.txt
    │   ├── DebugInfo.cpp
    │   ├── GraphFunction.cpp
    │   ├── TensorMetaData.cpp
    │   ├── TensorMetaDataBase.hpp
    │   ├── TensorUseTracker.cpp
    │   ├── TileMapping.cpp
    │   ├── Util.cpp
    │   ├── VarStructure.cpp
    │   └── exceptions.cpp
    └── poputil_mock
    │   ├── CMakeLists.txt
    │   └── TileMapping.cpp
├── lsan.supp
├── packaging_files
    └── enable.sh.in
├── requirements.txt
├── tests
    ├── CMakeLists.txt
    ├── popfloat
    │   ├── CMakeLists.txt
    │   └── DebugInfoTest.cpp
    ├── poplibs_support
    │   ├── CMakeLists.txt
    │   ├── LoggingTest.cpp
    │   ├── MultiArrayTest.cpp
    │   ├── PlanConstraintsTest.cpp
    │   └── StridedRegionsTest.cpp
    ├── poplibs_test
    │   ├── CMakeLists.txt
    │   └── IdenticalLayoutTest.cpp
    ├── poplin
    │   ├── CMakeLists.txt
    │   ├── CholeskyTest.cpp
    │   ├── ConvExpandDimsVertexTest.cpp
    │   ├── ConvOptionsTest.cpp
    │   ├── ConvPlanTest.cpp
    │   ├── ConvTest.cpp
    │   ├── ConvUtilTest.cpp
    │   ├── LuFactorizationTest.cpp
    │   ├── MeshGridTest.cpp
    │   ├── MultiConvolutionPlanTest.cpp
    │   ├── MultiConvolutionTest.cpp
    │   ├── QRFactorizationTest.cpp
    │   ├── RangeTest.cpp
    │   ├── TriangularSolveTest.cpp
    │   ├── WinogradConv.cpp
    │   ├── codelets
    │   │   ├── CMakeLists.txt
    │   │   ├── ConvPartial1x1Out.cpp
    │   │   ├── ConvPartial1xNSLIC.cpp
    │   │   └── OuterProductTest.cpp
    │   ├── json
    │   │   ├── depthwise_conv_half.json
    │   │   ├── depthwise_conv_quarter1.json
    │   │   ├── depthwise_conv_quarter2.json
    │   │   ├── method_amp.json
    │   │   ├── pc_T10392.json
    │   │   ├── pc_conv7x7_stride_2_1024_in_512_out_serial_fail_case.json
    │   │   ├── pc_serial_split_ocx2.json
    │   │   ├── pc_serial_split_ocx3.json
    │   │   ├── pc_serial_split_ocx4.json
    │   │   ├── pointwise_conv_float.json
    │   │   ├── pointwise_reusable_conv_float.json
    │   │   ├── simple_depthwise_conv.json
    │   │   ├── slic144.json
    │   │   ├── slic1611.json
    │   │   ├── slic222.json
    │   │   ├── slic411.json
    │   │   └── slic811.json
    │   └── vertexVerifier
    │   │   ├── CMakeLists.txt
    │   │   ├── ConvPartialHalfHalfnx1KernelShapes.vv
    │   │   ├── ConvPartialQuarterHalf1x1Out.vv
    │   │   ├── ConvPartialQuarterHalf1x1OutConvGroups.vv
    │   │   ├── ConvPartialQuarterHalf1xnStride1.vv
    │   │   ├── ConvPartialQuarterHalf1xnStride2.vv
    │   │   ├── ConvPartialQuarterHalfnx1.vv
    │   │   ├── ConvPartialQuarterHalfnx1ConvGroups.vv
    │   │   └── ConvPartialQuarterHalfnx1KernelShapes.vv
    ├── popnn
    │   ├── BigNLVertices.cpp
    │   ├── CMakeLists.txt
    │   ├── CTCLossPlanTest.cpp
    │   ├── GraphProgLocationTest.cpp
    │   ├── LogSoftmaxTest.cpp
    │   ├── LossTest.cpp
    │   ├── NonLinearityGradSweepTest.cpp
    │   ├── NonLinearitySweepTest.cpp
    │   ├── NonLinearityTest.cpp
    │   ├── NormStatisticsTest.cpp
    │   ├── ROIAlignTest.cpp
    │   ├── SpatialSoftmaxTest.cpp
    │   └── codelets
    │   │   ├── CMakeLists.txt
    │   │   ├── CTCInferenceCodeletTest.cpp
    │   │   ├── CTCInferenceCodeletTestConnection.cpp
    │   │   ├── CTCInferenceCodeletTestConnection.hpp
    │   │   ├── CTCInferenceGenerateCandidates.cpp
    │   │   ├── CTCInferenceGenerateOutput.cpp
    │   │   ├── CTCInferenceMergeCandidates.cpp
    │   │   ├── CTCInferenceRankAndReduceCandidates.cpp
    │   │   ├── CTCInferenceUpdate.cpp
    │   │   ├── CTCLossCodeletTest.cpp
    │   │   ├── LossTransform.cpp.in
    │   │   ├── NonLinearity1D.cpp
    │   │   ├── NonLinearity2D.cpp
    │   │   └── PoolingCodeletTest.cpp
    ├── popops
    │   ├── AllTrueTest.cpp
    │   ├── BertSlicing.cpp
    │   ├── BertSlicingRefSmall.hpp
    │   ├── BinaryOpTest.cpp
    │   ├── BroadcastGeneratePatterns.cpp
    │   ├── BroadcastOptimiseTest.cpp
    │   ├── CMakeLists.txt
    │   ├── CircBufTests.cpp
    │   ├── DynamicSliceCreation.cpp
    │   ├── DynamicSliceTest.cpp
    │   ├── ElementWiseUtilTest.cpp
    │   ├── EncodingTest.cpp
    │   ├── ExprAPI.cpp
    │   ├── ExprName.cpp
    │   ├── GatherSimpleTest.cpp
    │   ├── GatherTest.cpp
    │   ├── HistogramTest.cpp
    │   ├── HostSliceTensorTest.cpp
    │   ├── LoopTest.cpp
    │   ├── MapExprMultiVertex.cpp
    │   ├── MapExprOptimisations.cpp
    │   ├── MapExprRemAndDivideOptimisations.cpp
    │   ├── MapExprScalar.cpp
    │   ├── MapFusionTest.cpp
    │   ├── MapMultipleOuts.cpp
    │   ├── MapWithOutputTest.cpp
    │   ├── NaNTest.cpp
    │   ├── NormaliseImageTest.cpp
    │   ├── PaddingTest.cpp
    │   ├── PlanMultipleCorrectnessTest.cpp
    │   ├── PlannedMultiSliceUpdateTest.cpp
    │   ├── QuarterTypeArithmeticTests.cpp
    │   ├── ReduceEdgeCases.cpp
    │   ├── ReductionPatternsTest.cpp
    │   ├── ReductionTests.cpp
    │   ├── RegroupTest.cpp
    │   ├── ScalarInFromRowsCommon.hpp
    │   ├── ScaledAddTest.cpp
    │   ├── ScatterTest.cpp
    │   ├── ScatterUpdateTest.cpp
    │   ├── SelectScalarFromRowsTest.cpp
    │   ├── SeqSliceTest.cpp
    │   ├── SortTest.cpp
    │   ├── SplineBasisTest.cpp
    │   ├── SplineWeightingTest.cpp
    │   ├── StdArithmeticTests.cpp
    │   ├── StdOperatorsTest.cpp
    │   ├── UnaryOpTest.cpp
    │   ├── UpdateScalarInRowsTest.cpp
    │   ├── codelets
    │   │   ├── BinaryCodeletsTest.cpp
    │   │   ├── BinaryCodeletsTest.hpp
    │   │   ├── BinaryOpRptLoopTest.vv
    │   │   ├── BroadcastSelect.cpp.in
    │   │   ├── CMakeLists.txt
    │   │   ├── CastCodeletsTest.cpp
    │   │   ├── CastTest.cpp
    │   │   ├── ClampTest.cpp.in
    │   │   ├── CodeletsTestsCommon.hpp
    │   │   ├── ContinuousReduce.cpp
    │   │   ├── DynamicSlice1DCodeletTest.cpp
    │   │   ├── DynamicSliceCodeletTest.cpp
    │   │   ├── FillCodeletsTest.cpp
    │   │   ├── FloatPointBehaviour.cpp
    │   │   ├── MultiSliceCodeletTest.cpp
    │   │   ├── PartialsEqualSizeReduce.cpp
    │   │   ├── Reduce.cpp
    │   │   ├── ReduceAdd.cpp
    │   │   ├── ReduceMaxClassGather.cpp
    │   │   ├── ReduceMaxClassSparse.cpp
    │   │   ├── ReduceNMaxClassGather.cpp
    │   │   ├── ReduceNMaxClassSparse.cpp
    │   │   ├── ReduceSpecial23.cpp
    │   │   ├── ScalarMultiplyCodeletsTest.cpp
    │   │   ├── ScaledAddCodeletsTest.cpp
    │   │   ├── Select.cpp.in
    │   │   ├── UnaryCodeletsTest.cpp
    │   │   ├── UnaryCodeletsTest.hpp
    │   │   ├── histogramCodeletTest.cpp
    │   │   └── select
    │   │   │   ├── bool.hpp
    │   │   │   ├── float.hpp
    │   │   │   ├── half.hpp
    │   │   │   ├── int.hpp
    │   │   │   └── unsigned_int.hpp
    │   └── infiles
    │   │   └── reduceInT59445.tensor
    ├── poprand
    │   └── CMakeLists.txt
    ├── popsparse
    │   ├── BlockSparseOpsTest.cpp
    │   ├── BlockSparseTest.cpp
    │   ├── CMakeLists.txt
    │   ├── PopsparseFullyConnectedPlan.cpp
    │   ├── ShardedSparseMatMul.cpp
    │   ├── SparseFormatsTest.cpp
    │   ├── SparseFormatsValidateTest.cpp
    │   ├── SparsePartitionerTests.cpp
    │   ├── StaticSparsePartitionerTest.cpp
    │   ├── bs-m8x8_0.8_nr.txt
    │   ├── codelets
    │   │   ├── BlockTransposeGradWTest.cpp
    │   │   ├── CMakeLists.txt
    │   │   ├── SparseDenseMatMulBlock.cpp
    │   │   ├── SparseDenseMatMulElementWise.cpp
    │   │   ├── SparseDenseMultiSlice.cpp
    │   │   ├── SparseDensePartitionBlock.cpp
    │   │   ├── SparseDensePartitionBlock.hpp
    │   │   ├── SparseDensePartitionElementWise.cpp
    │   │   ├── SparseDensePartitionElementWise.hpp
    │   │   ├── SparseDenseUtils.cpp
    │   │   ├── SparseDenseUtils.hpp
    │   │   └── SparseGatherTest.cpp
    │   └── static_sparse_T71209_mask.txt
    ├── poputil
    │   ├── BroadcastToMatchTest.cpp
    │   ├── CMakeLists.txt
    │   ├── CopyToIpu.cpp
    │   ├── DuplicateTensor.cpp
    │   ├── GraphFunctionTest.cpp
    │   ├── GraphReplication.cpp
    │   ├── LargeSplitRegionsTest.cpp
    │   ├── OptionParsingTest.cpp
    │   ├── TileMappingTest.cpp
    │   ├── UtilTest.cpp
    │   └── VarStructureTest.cpp
    ├── sanity
    │   ├── ApiCppVersionTest.cpp
    │   ├── CMakeLists.txt
    │   ├── ConsistentExecutableTest.cpp
    │   ├── EnumerateDevices.cpp
    │   ├── HangTest.cpp
    │   ├── ParallelTest.cpp
    │   └── dependencies
    │   │   ├── CMakeLists.txt
    │   │   └── check_cmake_link_include.py
    ├── soak-hw.json
    └── soak.json
└── tools
    ├── CMakeLists.txt
    ├── bs_matmul_test.cpp
    ├── cast_to_gfloat.cpp
    ├── cast_to_gfloat.hpp
    ├── cast_to_gfloat_sr.cpp
    ├── consistent_executable_tool.cpp
    ├── ctc_beam_search.cpp
    ├── ctc_inference_model_validate.py
    ├── ctc_loss.cpp
    ├── ctc_model_validate.cpp
    ├── embedding_layer.cpp
    ├── fully_connected_layer.cpp
    ├── general_matrix_multiply.cpp
    ├── generate_cycle_estimate.py
    ├── gru_layer.cpp
    ├── lstm_layer.cpp
    ├── matrix_solver.cpp
    ├── multi_conv_layer.cpp
    ├── norm_layer.cpp
    ├── oct_conv_layer.cpp
    ├── pooling_layer.cpp
    ├── random_generator.cpp
    ├── reduce_op.cpp
    ├── rnn_layer.cpp
    ├── single_conv_layer.cpp
    ├── single_conv_layer_random.py
    ├── single_conv_soak.py
    ├── sparse_embedding_tied_matmul.cpp
    ├── sparse_fc_layer.cpp
    ├── sparse_matmul.cpp
    ├── src
        ├── conv_analysis.cpp
        └── conv_analysis.hpp
    ├── stalls_highlighter.py
    ├── static_sparse_matmul.cpp
    ├── test_determinism.py
    ├── topk.cpp
    └── transforms.py


/.ci/view_component_trigger/Jenkinsfile:
--------------------------------------------------------------------------------
1 | @Library('sw-jenkins-library@view-component-trigger') _
2 | 
3 | viewComponentTrigger(jobsFilepath: '.ci/view_component_trigger/jobs.groovy')
4 | 


--------------------------------------------------------------------------------
/.ci/view_component_trigger/jobs.groovy:
--------------------------------------------------------------------------------
1 | [
2 |     [job: '/poplar/poplar_pr', parameters: [string(name: 'GCCI_BRANCH', value: 'mk2-main')]],
3 | ]
4 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
1 | BasedOnStyle: llvm
2 | Language: Cpp
3 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @Software-GCAI/poplar
2 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yaml:
--------------------------------------------------------------------------------
 1 | name: pre-commit
 2 | 
 3 | on:
 4 |   pull_request:
 5 | 
 6 | concurrency:
 7 |   group: ${{ github.workflow }}-${{ github.ref }}
 8 |   cancel-in-progress: true
 9 | 
10 | jobs:
11 |   pre-commit:
12 |     runs-on: ["self-hosted", "linux"]
13 |     steps:
14 |     - uses: actions/checkout@v3
15 |       with:
16 |         fetch-depth: 0  # Required for us to have a valid ref to the target branch
17 |     - uses: actions/setup-python@v3
18 |     - uses: actions/cache@v3
19 |       with:
20 |         path: ~/.cache/pre-commit/
21 |         key: pre-commit-4|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }}
22 |     - uses: webfactory/ssh-agent@v0.8.0
23 |       with:
24 |         ssh-private-key: ${{ secrets.PRE_COMMIT_SCRIPTS_DEPLOY_KEY }}
25 |     - name: SSH Keys and known_hosts
26 |       run: |
27 |         mkdir -p ~/.ssh/ && touch ~/.ssh/known_hosts
28 |         ssh-keyscan github.com >> ~/.ssh/known_hosts
29 |     - uses: pre-commit/action@v3.0.0
30 |       with:
31 |         extra_args: "--from-ref ${{ github.event.pull_request.base.sha }} --to-ref ${{ github.event.pull_request.head.sha }}"
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea/
 2 | tools/__pycache__
 3 | build/
 4 | CMakeLists.txt.user
 5 | *.DS_Store
 6 | cmake-build-debug
 7 | 
 8 | .vscode
 9 | compile_commands.json
10 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | repos:
 4 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 5 |     rev: v3.2.0
 6 |     hooks:
 7 |     -   id: trailing-whitespace
 8 |     -   id: end-of-file-fixer
 9 |     -   id: check-json
10 |     -   id: check-added-large-files
11 | -   repo: https://github.com/cheshirekow/cmake-format-precommit
12 |     rev: v0.6.10
13 |     hooks:
14 |     -   id: cmake-lint
15 |         additional_dependencies: [pyyaml]
16 |         args: [--config=.cmake-lint.yaml]
17 |         types: [cmake]
18 | -   repo: ssh://git@github.com/Software-GCAI/precommit-scripts
19 |     rev: v1.0.1
20 |     hooks:
21 |     -   id: gc-copyright
22 | 


--------------------------------------------------------------------------------
/License.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2020 Graphcore Limited
 2 | 
 3 | Licensed under terms of MIT license.
 4 | You may obtain a copy of the license at https://opensource.org/licenses/MIT
 5 | 
 6 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 7 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 8 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 9 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
10 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
11 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
12 | 


--------------------------------------------------------------------------------
/cbt.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dependencies" : [
 3 |     "boost",
 4 |     "libpva",
 5 |     "libpvti",
 6 |     "gccs",
 7 |     "poplar",
 8 |     "spdlog",
 9 |     "tbb",
10 |     "zoltan"
11 |   ],
12 |   "optional_dependencies": [
13 |     "googletest"
14 |   ]
15 | }
16 | 


--------------------------------------------------------------------------------
/cmake/Modules/Findzoltan.cmake:
--------------------------------------------------------------------------------
 1 | # Search for zoltan include directory and libraries
 2 | # This module defines the following variables:
 3 | #   - ZOLTAN_INCLUDE_DIR
 4 | #   - ZOLTAN_LIBRARY
 5 | #   - ZOLTAN_SIMPI_LIBRARY
 6 | 
 7 | find_path(ZOLTAN_INCLUDE_DIR zoltan.h
 8 |   HINTS ${ZOLTAN_ROOT}/include $ENV{ZOLTAN_ROOT}/include)
 9 | 
10 | find_library(ZOLTAN_LIBRARY zoltan
11 |   HINTS ${ZOLTAN_ROOT}/lib $ENV{ZOLTAN_ROOT}/lib)
12 | 
13 | find_library(ZOLTAN_SIMPI_LIBRARY simpi
14 |   HINTS ${ZOLTAN_ROOT}/lib $ENV{ZOLTAN_ROOT}/lib)
15 | 
16 | set(ZOLTAN_LIBRARIES ${ZOLTAN_LIBRARY} ${ZOLTAN_SIMPI_LIBRARY})
17 | 
18 | if(ZOLTAN_INCLUDE_DIR AND ZOLTAN_LIBRARY AND ZOLTAN_SIMPI_LIBRARY)
19 |   if(NOT TARGET ZOLTAN::zoltan)
20 |     add_library(ZOLTAN::zoltan STATIC IMPORTED)
21 |     set_target_properties(ZOLTAN::zoltan PROPERTIES
22 |       INTERFACE_INCLUDE_DIRECTORIES "${ZOLTAN_INCLUDE_DIR}"
23 |       IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
24 |       IMPORTED_LOCATION "${ZOLTAN_LIBRARY}")
25 |     add_library(ZOLTAN::simpi STATIC IMPORTED)
26 |     set_target_properties(ZOLTAN::simpi PROPERTIES
27 |       INTERFACE_INCLUDE_DIRECTORIES "${ZOLTAN_INCLUDE_DIR}"
28 |       IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
29 |       IMPORTED_LOCATION "${ZOLTAN_SIMPI_LIBRARY}")
30 |   endif()
31 | endif()
32 | 
33 | include(FindPackageHandleStandardArgs)
34 | # Sets zoltan_FOUND
35 | find_package_handle_standard_args(zoltan DEFAULT_MSG
36 |   ZOLTAN_INCLUDE_DIR ZOLTAN_LIBRARY ZOLTAN_SIMPI_LIBRARY)
37 | 


--------------------------------------------------------------------------------
/exported_symbols_linux.lds:
--------------------------------------------------------------------------------
 1 | V0 {
 2 |   global:
 3 |     _ZN?pop*;
 4 |     _ZN??pop*;
 5 |     _ZNK?pop*;
 6 |     _ZNK??pop*;
 7 |     _ZTSN?pop*;
 8 |     _ZTSN??pop*;
 9 |     _ZTIN?pop*;
10 |     _ZTIN??pop*;
11 |     _ZTVN?pop*;
12 |     _ZTVN??pop*;
13 |     _ZStrs*6poplar4TypeE;
14 |     _ZN?experimental*;
15 |     _ZN??experimental*;
16 |   local:
17 |     *;
18 | };
19 | 


--------------------------------------------------------------------------------
/exported_symbols_osx.lds:
--------------------------------------------------------------------------------
 1 | __ZN?pop*
 2 | __ZN??pop*
 3 | __ZNK?pop*
 4 | __ZNK??pop*
 5 | __ZTSN?pop*
 6 | __ZTSN??pop*
 7 | __ZTIN?pop*
 8 | __ZTIN??pop*
 9 | __ZTVN?pop*
10 | __ZTVN??pop*
11 | __ZStrs*6poplar4TypeE
12 | __ZN?experimental*
13 | __ZN??experimental*
14 | 


--------------------------------------------------------------------------------
/include/popfloat/experimental/CastToHalf.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef POPFLOAT_HALF_UTILS_H
 4 | #define POPFLOAT_HALF_UTILS_H
 5 | 
 6 | namespace popfloat {
 7 | namespace experimental {
 8 | 
 9 | /** Cast a single precision input to half precision
10 |  *
11 |  * \param value          Single precision input
12 |  * \param enNanoo        Enable Nan on overflow
13 |  * \return               The 16-bit representation of the half precision output
14 |  */
15 | uint16_t singleToHalf(float value, bool enNanoo = false);
16 | 
17 | /** Cast a half precision input to single precision
18 |  *
19 |  * \param ihalf          The 16-bit representation of the half precision input
20 |  * \return               Single precision output
21 |  */
22 | float halfToSingle(uint16_t ihalf);
23 | 
24 | } // end namespace experimental
25 | } // end namespace popfloat
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/include/popfloat/experimental/codelets.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popfloat_codelets_hpp
 4 | #define popfloat_codelets_hpp
 5 | #include <poplar/Graph.hpp>
 6 | 
 7 | namespace popfloat {
 8 | namespace experimental {
 9 | void addCodelets(poplar::Graph &graph);
10 | } // end namespace experimental
11 | } // end namespace popfloat
12 | 
13 | #endif // popfloat_codelets_hpp
14 | 


--------------------------------------------------------------------------------
/include/popfloat_mock/Mock.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #ifndef popfloat_Mock_hpp
 3 | #define popfloat_Mock_hpp
 4 | 
 5 | #include <gmock/gmock.h>
 6 | #include <popfloat/experimental/codelets.hpp>
 7 | 
 8 | namespace popfloat_mock {
 9 | 
10 | class MockPopfloat {
11 | public:
12 |   // experimental/codelets.hpp
13 | 
14 |   MOCK_METHOD(void, experimental_addCodelets, (::poplar::Graph &));
15 | };
16 | 
17 | extern MockPopfloat *mockPopfloat_;
18 | 
19 | } // namespace popfloat_mock
20 | 
21 | #endif // popfloat_Mock_hpp
22 | 


--------------------------------------------------------------------------------
/include/popfloat_mock/MockFixture.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #ifndef popfloat_Mock_Fixture_hpp
 3 | #define popfloat_Mock_Fixture_hpp
 4 | 
 5 | #include <popfloat_mock/Mock.hpp>
 6 | 
 7 | namespace popfloat_mock {
 8 | 
 9 | template <template <typename> typename Mock = ::testing::StrictMock>
10 | class MockPopfloatFixture {
11 | public:
12 |   MockPopfloatFixture() {
13 |     mockPopfloat_ = static_cast<popfloat_mock::MockPopfloat *>(&mockPopfloat);
14 |   }
15 | 
16 |   ~MockPopfloatFixture() { mockPopfloat_ = nullptr; }
17 | 
18 | protected:
19 |   Mock<MockPopfloat> mockPopfloat;
20 | };
21 | 
22 | } // namespace popfloat_mock
23 | 
24 | #endif // popfloat_Mock_Fixture_hpp
25 | 


--------------------------------------------------------------------------------
/include/poplibs_support/CTCInferenceDefs.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | #ifndef popnn_CTCInferenceDefs_hpp
 3 | #define popnn_CTCInferenceDefs_hpp
 4 | 
 5 | #ifndef INCLUDE_IN_ASSEMBLER
 6 | #include <limits>
 7 | 
 8 | namespace popnn {
 9 | namespace ctc_infer {
10 | // A reserved class index representing "nothing" in beam search decoding
11 | inline constexpr auto voidSymbol = std::numeric_limits<unsigned>::max();
12 | // A reserved class index representing an invalid candidate or beam addend
13 | // in beam search decoding
14 | inline constexpr auto invalidSymbol = std::numeric_limits<unsigned>::max() - 1;
15 | } // namespace ctc_infer
16 | } // namespace popnn
17 | 
18 | #endif
19 | 
20 | // Equivalent definition for assembler inclusion
21 | #define VOID_SYMBOL 0xffffffff
22 | #define INVALID_SYMBOL 0xfffffffe
23 | 
24 | #endif // popnn_CTCInferenceDefs_hpp
25 | 


--------------------------------------------------------------------------------
/include/poplibs_support/Compiler.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef poplibs_support_Compiler_hpp
 4 | #define poplibs_support_Compiler_hpp
 5 | 
 6 | // This file provides useful macros to use in the Poplar libraries
 7 | 
 8 | #ifdef NDEBUG
 9 | #define POPLIB_UNREACHABLE() __builtin_unreachable()
10 | #else
11 | #define POPLIB_UNREACHABLE() __builtin_trap()
12 | #endif
13 | 
14 | #endif // poplibs_support_Compiler_hpp
15 | 


--------------------------------------------------------------------------------
/include/poplibs_support/ContiguousRegionsByTile.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef poplibs_support_ContiguousRegionsByTile_hpp
 4 | #define poplibs_support_ContiguousRegionsByTile_hpp
 5 | 
 6 | #include <poplar/Graph.hpp>
 7 | #include <poplar/Interval.hpp>
 8 | #include <poplar/Tensor.hpp>
 9 | 
10 | #include <cstdint>
11 | #include <vector>
12 | 
13 | namespace poplibs {
14 | 
15 | /// For the given tensor return a list for each tile of contiguous memory
16 | /// regions on that tile. Each contiguous memory region is made up of a list
17 | /// of intervals in the flattened tensor A.
18 | ///
19 | /// Equivalent to the following code, but should be faster in some cases:
20 | ///
21 | ///     vector<vector<vector<Interval<size_t>>>> contiguousRegionsByTile;
22 | ///
23 | ///     for (const auto &m : mapping) {
24 | ///       contiguousRegionsByTile.emplace_back(
25 | ///         graph.getSortedContiguousRegions(A, m)
26 | ///       );
27 | ///     }
28 | ///
29 | /// \param graph    The compute graph
30 | /// \param A        The tensor
31 | /// \param mapping  Must be the result of graph.getTileMapping(A). This is
32 | ///                 passed as a parameter because getTileMapping can be slow
33 | ///                 and you may already have the data.
34 | std::vector<std::vector<std::vector<poplar::Interval>>>
35 | getSortedContiguousRegionsByTile(
36 |     const poplar::Graph &graph, const poplar::Tensor &A,
37 |     const poplar::Graph::TileToTensorMapping &mapping);
38 | 
39 | } // namespace poplibs
40 | 
41 | #endif // poplibs_support_ContiguousRegionsByTile_hpp
42 | 


--------------------------------------------------------------------------------
/include/poplibs_support/ExternalCodelet.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | #ifndef poplibs_support_ExternalCodelet_hpp
 3 | #define poplibs_support_ExternalCodelet_hpp
 4 | 
 5 | #if defined(POPLIBS_DISABLE_ASM_CODELETS)
 6 | #define ASM_CODELETS_ENABLED false
 7 | #else
 8 | #define ASM_CODELETS_ENABLED true
 9 | #endif
10 | 
11 | #if defined(__IPU__) && !defined(POPLIBS_DISABLE_ASM_CODELETS)
12 | #define EXTERNAL_CODELET true
13 | #define IS_EXTERNAL_CODELET(pred) static const bool isExternalCodelet = pred
14 | #else
15 | #define EXTERNAL_CODELET false
16 | #define IS_EXTERNAL_CODELET(pred) static const bool isExternalCodelet = false
17 | #endif
18 | 
19 | #endif // poplibs_support_ExternalCodelet_hpp
20 | 


--------------------------------------------------------------------------------
/include/poplibs_support/IclUtil.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | #ifndef poplibs_support_IclUtil_hpp
 3 | #define poplibs_support_IclUtil_hpp
 4 | 
 5 | #include <boost/icl/interval_map.hpp>
 6 | #include <boost/icl/split_interval_set.hpp>
 7 | #include <cstddef>
 8 | #include <poplar/Graph.hpp>
 9 | #include <poplar/Interval.hpp>
10 | #include <vector>
11 | 
12 | // Utility functions for converting to/from boost::icl classes.
13 | 
14 | namespace poplibs {
15 | 
16 | /// Convert an ICL split_interval_set to a vector of poplar::Interval's.
17 | std::vector<poplar::Interval> splitIntervalSetToPoplar(
18 |     const boost::icl::split_interval_set<std::size_t> &intervals);
19 | 
20 | /// Convert a vector of poplar::Interval's to an ICL split_interval_set.
21 | boost::icl::split_interval_set<std::size_t>
22 | poplarToSplitIntervalSet(const std::vector<poplar::Interval> &intervals);
23 | 
24 | /// Convert a tile mapping from poplar's vector<vector<Interval>> format
25 | /// to an ICL interval_map. partial_enricher ensures tile 0 is stored.
26 | boost::icl::interval_map<std::size_t, unsigned, boost::icl::partial_enricher>
27 | tileMappingToIntervalMap(const poplar::Graph::TileToTensorMapping &mapping);
28 | 
29 | } // namespace poplibs
30 | 
31 | #endif // poplibs_support_IclUtil_hpp
32 | 


--------------------------------------------------------------------------------
/include/poplibs_support/Memoize.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | #ifndef poplibs_support_Memoize_hpp
 3 | #define poplibs_support_Memoize_hpp
 4 | 
 5 | #include "poplibs_support/HashTuple.hpp"
 6 | #include <tbb/concurrent_unordered_map.h>
 7 | 
 8 | namespace poplibs_support {
 9 | 
10 | // A simple function to memoize other functions. Any recursive calls
11 | // with the function are non memoized
12 | template <typename Ret, typename... Args> class Memo {
13 |   using Key = std::tuple<typename std::remove_reference<Args>::type...>;
14 | 
15 | public:
16 |   tbb::concurrent_unordered_map<Key, Ret, hash_tuple::hash<Key>> table;
17 |   Ret (*fn)(Args...);
18 | 
19 | public:
20 |   Memo(Ret (*fn)(Args...)) : fn(fn) {}
21 |   Ret operator()(Args... args) {
22 |     const auto key = std::make_tuple(args...);
23 |     const auto match = table.find(key);
24 |     if (match == table.end()) {
25 |       auto result = fn(args...);
26 |       auto insertRes = table.insert({key, result});
27 |       // another thread may have updated with the same key - in which case
28 |       // it should be with the same value
29 |       if (insertRes.second == false)
30 |         assert(insertRes.first->second == result);
31 |       return result;
32 |     } else {
33 |       return match->second;
34 |     }
35 |   }
36 |   void clearTable() { table.clear(); }
37 | };
38 | 
39 | template <typename Ret, typename... Args>
40 | static Memo<Ret, Args...> memoize(Ret (*fn)(Args...)) {
41 |   return Memo<Ret, Args...>(fn);
42 | }
43 | 
44 | } // namespace poplibs_support
45 | 
46 | #endif // poplibs_support_Memoize_hpp
47 | 


--------------------------------------------------------------------------------
/include/poplibs_support/TraceChannels.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef poplibs_support_TraceChannels_hpp
 4 | #define poplibs_support_TraceChannels_hpp
 5 | 
 6 | #include <pvti/pvti.hpp>
 7 | 
 8 | namespace poplibs_support {
 9 | 
10 | extern pvti::TraceChannel tracePoplin;
11 | extern pvti::TraceChannel tracePopnn;
12 | extern pvti::TraceChannel tracePopops;
13 | extern pvti::TraceChannel tracePoprand;
14 | extern pvti::TraceChannel tracePopsparse;
15 | extern pvti::TraceChannel tracePoputil;
16 | 
17 | } // end namespace poplibs_support
18 | 
19 | #endif // poplibs_support_TraceChannels_hpp
20 | 


--------------------------------------------------------------------------------
/include/poplibs_support/VectorUtils.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef poplibs_support_VectorUtils_hpp
 4 | #define poplibs_support_VectorUtils_hpp
 5 | 
 6 | #include <functional>
 7 | #include <numeric>
 8 | #include <type_traits>
 9 | #include <vector>
10 | 
11 | template <class T> inline T product(const std::vector<T> &v) {
12 |   return std::accumulate(v.begin(), v.end(), T(1), std::multiplies<T>());
13 | }
14 | template <class T> inline T sum(const std::vector<T> &v) {
15 |   return std::accumulate(v.begin(), v.end(), T(0), std::plus<T>());
16 | }
17 | 
18 | template <class T>
19 | inline std::vector<T> inversePermutation(const std::vector<T> &v) {
20 |   static_assert(
21 |       std::is_unsigned<T>(),
22 |       "Data type must be unsigned integer as data represents indices");
23 |   std::vector<T> result(v.size());
24 |   for (std::size_t i = 0; i < v.size(); ++i) {
25 |     result[v[i]] = i;
26 |   }
27 |   return result;
28 | }
29 | 
30 | template <class To, class From>
31 | std::vector<To> vectorConvert(const std::vector<From> &in) {
32 |   std::vector<To> out;
33 |   out.reserve(in.size());
34 |   for (const auto &x : in) {
35 |     out.emplace_back(x);
36 |   }
37 |   return out;
38 | }
39 | 
40 | template <class T>
41 | std::vector<T> removeSingletonDimensions(const std::vector<T> &v) {
42 |   static_assert(std::is_integral<T>::value, "Integral required.");
43 |   std::vector<T> out;
44 |   for (auto e : v) {
45 |     if (e != 1)
46 |       out.emplace_back(e);
47 |   }
48 |   return out;
49 | }
50 | 
51 | #endif // poplibs_support_VectorUtils_hpp
52 | 


--------------------------------------------------------------------------------
/include/poplibs_support/codelets.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | #ifndef poplibs_support_codelets_hpp_
 3 | #define poplibs_support_codelets_hpp_
 4 | #include <string>
 5 | 
 6 | namespace poplibs {
 7 | 
 8 | struct CurrentLibLocator {
 9 |   void *dummy;
10 | };
11 | 
12 | std::string getCodeletsPath(const std::string &libName,
13 |                             const std::string &codeletsFile,
14 |                             const CurrentLibLocator &locator);
15 | 
16 | } // end namespace poplibs
17 | 
18 | #endif // poplibs_support_codelets_hpp_
19 | 


--------------------------------------------------------------------------------
/include/poplibs_support/forceInterleavedEstimates.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | #ifndef poplibs_support_forceInterleavedEstimates_hpp
 3 | #define poplibs_support_forceInterleavedEstimates_hpp
 4 | 
 5 | // This file contains the function that is called to check the environment
 6 | // variable that is set when we want the cycle estimators to be forced to
 7 | // account for interleaved memory
 8 | 
 9 | namespace poplibs_support {
10 | 
11 | bool getForceInterleavedEstimates();
12 | 
13 | } // namespace poplibs_support
14 | 
15 | #endif // poplibs_support_forceInterleavedEstimates_hpp
16 | 


--------------------------------------------------------------------------------
/include/poplibs_test/Embedding.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #include <boost/multi_array.hpp>
 4 | 
 5 | namespace poplibs_test {
 6 | namespace embedding {
 7 | 
 8 | template <typename FPType>
 9 | void multiSlice(const boost::multi_array<FPType, 2> &embeddingMatrix,
10 |                 const std::vector<unsigned> &indices,
11 |                 boost::multi_array<FPType, 2> &result);
12 | 
13 | template <typename FPType>
14 | void multiUpdateAdd(const boost::multi_array<FPType, 2> &deltas,
15 |                     const std::vector<unsigned> &indices, const FPType scale,
16 |                     boost::multi_array<FPType, 2> &embeddingMatrix);
17 | 
18 | } // namespace embedding
19 | } // namespace poplibs_test
20 | 


--------------------------------------------------------------------------------
/include/poplibs_test/GeneralMatrixAdd.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef poplibs_test_GeneralMatrixAdd_hpp
 4 | #define poplibs_test_GeneralMatrixAdd_hpp
 5 | 
 6 | #include <boost/multi_array.hpp>
 7 | 
 8 | namespace poplibs_test {
 9 | namespace axpby {
10 | 
11 | /*
12 |  * Computes matD = alpha * op(matA) + beta * op(matB)
13 |  *
14 |  * where op(matA) = A     if transposeA = false
15 |  *       op(matA) = A'    if transposeA = true
16 |  *
17 |  *
18 |  *       op(matB) = B     if transposeB = false
19 |  *       op(matB) = B'    if transposeB = true
20 |  *
21 |  * Matrix dimensions of op(A) must be equal to op(B)
22 |  */
23 | 
24 | void add(const boost::multi_array_ref<double, 3> matA,
25 |          const boost::multi_array_ref<double, 3> matB,
26 |          boost::multi_array_ref<double, 3> matC, float alpha = 1.0,
27 |          float beta = 1.0);
28 | 
29 | void add(const boost::multi_array_ref<double, 2> matA,
30 |          const boost::multi_array_ref<double, 2> matB,
31 |          boost::multi_array_ref<double, 2> matC, float alpha = 1.0,
32 |          float beta = 1.0, bool transposeA = false, bool transposeB = false);
33 | 
34 | void add(const boost::multi_array_ref<double, 1> matA,
35 |          const boost::multi_array_ref<double, 1> matB,
36 |          boost::multi_array_ref<double, 1> matC, float alpha = 1.0,
37 |          float beta = 1.0);
38 | 
39 | } // End namespace axpby.
40 | } // End namespace poplibs_test.
41 | 
42 | #endif // poplibs_test_GeneralMatrixAdd_hpp
43 | 


--------------------------------------------------------------------------------
/include/poplibs_test/MatrixTransforms.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef poplibs_test_MatrixTransforms_hpp
 4 | #define poplibs_test_MatrixTransforms_hpp
 5 | 
 6 | #include <boost/multi_array.hpp>
 7 | 
 8 | namespace poplibs_test {
 9 | namespace matrix {
10 | 
11 | template <typename FPType>
12 | boost::multi_array<FPType, 2>
13 | transpose(const boost::multi_array<FPType, 2> &in) {
14 |   const auto inRows = in.shape()[0];
15 |   const auto inColumns = in.shape()[1];
16 |   boost::multi_array<FPType, 2> out(boost::extents[inColumns][inRows]);
17 |   for (unsigned inRow = 0; inRow < inRows; inRow++) {
18 |     for (unsigned inColumn = 0; inColumn < inColumns; inColumn++) {
19 |       out[inColumn][inRow] = in[inRow][inColumn];
20 |     }
21 |   }
22 |   return out;
23 | }
24 | 
25 | } // End namespace matrix
26 | } // End namespace poplibs_test
27 | 
28 | #endif // poplibs_test_MatrixTransforms_hpp
29 | 


--------------------------------------------------------------------------------
/include/poplibs_test/Multirate.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef poplibs_test_Multirate_hpp
 4 | #define poplibs_test_Multirate_hpp
 5 | 
 6 | #include <boost/multi_array.hpp>
 7 | 
 8 | namespace poplibs_test {
 9 | 
10 | //
11 | // Upsample multi-array by a given factor
12 | //
13 | // The flattened field dimensions are located in the innermost multi-array
14 | // dimension
15 | //
16 | void upsample(const std::vector<std::size_t> &inFieldShape,
17 |               const unsigned samplingRate,
18 |               const boost::multi_array<double, 3> &input,
19 |               boost::multi_array<double, 3> &output);
20 | 
21 | //
22 | // Downsample multi-array by a given factor
23 | //
24 | // The flattened field dimensions are located in the innermost multi-array
25 | // dimension
26 | //
27 | void downsample(const std::vector<std::size_t> &outFieldShape,
28 |                 const unsigned samplingRate,
29 |                 const boost::multi_array<double, 3> &input,
30 |                 boost::multi_array<double, 3> &output);
31 | 
32 | } // namespace poplibs_test
33 | 
34 | #endif // poplibs_test_Multirate_hpp
35 | 


--------------------------------------------------------------------------------
/include/poplibs_test/Pass.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef poplibs_test_Pass_hpp
 4 | #define poplibs_test_Pass_hpp
 5 | 
 6 | #include <iosfwd>
 7 | 
 8 | namespace poplibs_test {
 9 | 
10 | // class to allow the training pass to be specified
11 | enum class Pass { FWD, BWD, WU, ALL };
12 | 
13 | const char *asString(const Pass &pass);
14 | 
15 | std::istream &operator>>(std::istream &is, Pass &pass);
16 | 
17 | std::ostream &operator<<(std::ostream &os, const Pass &pass);
18 | 
19 | } // End namespace poplibs_test.
20 | 
21 | #endif // poplibs_test_Pass_hpp
22 | 


--------------------------------------------------------------------------------
/include/poplibs_test/Pooling.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef poplibs_test_Pooling_hpp
 4 | #define poplibs_test_Pooling_hpp
 5 | 
 6 | #include "poplibs_support/MultiArray.hpp"
 7 | #include "popnn/PoolingDef.hpp"
 8 | 
 9 | #include <vector>
10 | 
11 | namespace poplibs_test {
12 | namespace pooling {
13 | 
14 | void pooling(popnn::PoolingType pType, const std::vector<unsigned> &stride,
15 |              const std::vector<std::size_t> &kernel,
16 |              const std::vector<int> &paddingLower,
17 |              const std::vector<int> &paddingUpper,
18 |              const poplibs_support::MultiArray<double> &in,
19 |              poplibs_support::MultiArray<double> &out);
20 | 
21 | void poolingBackward(popnn::PoolingType pType, bool useScaledGradForMaxPool,
22 |                      const std::vector<unsigned> &stride,
23 |                      const std::vector<std::size_t> &kernel,
24 |                      const std::vector<int> &paddingLower,
25 |                      const std::vector<int> &paddingUpper,
26 |                      const poplibs_support::MultiArray<double> &prevAct,
27 |                      const poplibs_support::MultiArray<double> &nextAct,
28 |                      const poplibs_support::MultiArray<double> &in,
29 |                      poplibs_support::MultiArray<double> &out);
30 | 
31 | } // namespace pooling
32 | } // namespace poplibs_test
33 | 
34 | #endif // poplibs_test_Pooling_hpp
35 | 


--------------------------------------------------------------------------------
/include/poplibs_test/TempDir.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #ifndef test_helper_hpp
 3 | #define test_helper_hpp
 4 | 
 5 | #include <boost/filesystem.hpp>
 6 | 
 7 | /// Representation of temporary directory that removes it on destruction.
 8 | class TempDir {
 9 |   const std::string path;
10 | 
11 | public:
12 |   TempDir(std::string path) : path{std::move(path)} {}
13 |   static TempDir create() {
14 |     using namespace boost::filesystem;
15 |     const auto path = unique_path("poplibs_%%%%%%%%%%%%");
16 |     if (!create_directories(path)) {
17 |       throw std::runtime_error("Error creating temporary directory " +
18 |                                path.string());
19 |     }
20 |     return TempDir(path.string());
21 |   }
22 |   TempDir(const TempDir &) = delete;
23 |   TempDir &operator=(const TempDir &) = delete;
24 |   TempDir(TempDir &&other) : path{std::move(other.path)} {}
25 |   ~TempDir() {
26 |     using namespace boost::filesystem;
27 |     if (!path.empty()) {
28 |       if (exists(path)) {
29 |         remove_all(path);
30 |       }
31 |     }
32 |   }
33 |   std::string getPath() const { return path; }
34 | };
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/include/poplibs_test/exceptions.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef poplibs_test_exceptions_hpp
 4 | #define poplibs_test_exceptions_hpp
 5 | 
 6 | #include <stdexcept>
 7 | #include <string>
 8 | 
 9 | namespace poplibs_test {
10 | 
11 | struct poplibs_test_error : std::runtime_error {
12 |   std::string type;
13 |   explicit poplibs_test_error(const std::string &s) : std::runtime_error(s) {
14 |     type = __FUNCTION__;
15 |   }
16 |   explicit poplibs_test_error(const char *s) : std::runtime_error(s) {
17 |     type = __FUNCTION__;
18 |   }
19 | };
20 | 
21 | } // End namespace poplibs_test.
22 | 
23 | #endif // poplibs_test_exceptions_hpp
24 | 


--------------------------------------------------------------------------------
/include/poplin/ConvPreplan.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | /** \file
 3 |  *
 4 |  * Functions and data types to support performing convolution preplanning
 5 |  *
 6 |  */
 7 | 
 8 | #ifndef poplin_ConvPreplan_hpp
 9 | #define poplin_ConvPreplan_hpp
10 | #include "Convolution.hpp"
11 | #include "MatMul.hpp"
12 | 
13 | #include <poplar/Graph.hpp>
14 | #include <poplar/OptionFlags.hpp>
15 | #include <set>
16 | #include <tuple>
17 | 
18 | namespace poplin {
19 | 
20 | /**
21 |  * Plan the specified convolutions & matmuls.
22 |  *
23 |  * \param convs   A set of tuples of:
24 |  *                  - conv-specific target for tile / IPU sizing
25 |  *                  - convolution parameters
26 |  *                  - implementation options. See createWeights().
27 |  *
28 |  *                All entries must have matching machine parameters.
29 |  * \param matmuls A set of tuples of:
30 |  *                  - matmul-specific target for tile / IPU sizing
31 |  *                  - convolution parameters
32 |  *                  - implementation options. See createWeights().
33 |  *
34 |  *                All entries must have matching machine parameters.
35 |  * \param cache   The planning cache to update.
36 |  */
37 | void preplan(const std::set<ConvPlanParams> &convs,
38 |              const std::set<MatMulPlanParams> &matmuls, PlanningCache &cache);
39 | 
40 | } // namespace poplin
41 | 
42 | #endif // poplin_ConvPreplan_hpp
43 | 


--------------------------------------------------------------------------------
/include/poplin/codelets.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef poplin_codelets_hpp
 4 | #define poplin_codelets_hpp
 5 | #include <poplar/Graph.hpp>
 6 | 
 7 | /// Linear algebra functions.
 8 | namespace poplin {
 9 | void addCodelets(poplar::Graph &graph);
10 | }
11 | 
12 | #endif // poplin_codelets_hpp
13 | 


--------------------------------------------------------------------------------
/include/poplin/experimental/LuFactorization.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023 Graphcore Ltd. All rights reserved.
 2 | /**
 3 |  *
 4 |  * Decomposition of a matrix into an lower triangular matrix L and upper
 5 |  * triangular matrix U.
 6 |  *
 7 |  */
 8 | 
 9 | #ifndef poplin_LuFactorization_hpp
10 | #define poplin_LuFactorization_hpp
11 | #include <poplar/Graph.hpp>
12 | #include <poplar/Program.hpp>
13 | #include <poplar/Tensor.hpp>
14 | 
15 | namespace poplin {
16 | 
17 | namespace experimental {
18 | 
19 | /**
20 |  * Calculates the LU factorization for the given matrix.
21 |  *
22 |  *
23 |  *  \param graph          The Poplar graph.
24 |  *  \param input          Input Tensor of floating-point type [M, N].
25 |  *  \param prog           A reference to a program sequence to which the code
26 |  *                        to perform the arrangement will be appended.
27 |  *  \param debugContext   Optional debug information.
28 |  *
29 |  *  \returns              The matrices L and U, where ->
30 |  *                        L = Lower triangular fp32 matrix [M, M].
31 |  *                        U = Upper triangular fp32 matrix [M, N].
32 |  */
33 | std::pair<poplar::Tensor, poplar::Tensor>
34 | LUFactorization(poplar::Graph &graph, poplar::Tensor &input,
35 |                 poplar::program::Sequence &seq,
36 |                 const poplar::DebugContext &debugContext = {});
37 | 
38 | } // namespace experimental
39 | 
40 | } // namespace poplin
41 | 
42 | #endif // poplin_LuFactorization_hpp
43 | 


--------------------------------------------------------------------------------
/include/poplin_mock/MockFixture.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #ifndef poplin_Mock_Fixture_hpp
 3 | #define poplin_Mock_Fixture_hpp
 4 | 
 5 | #include <poplin_mock/Mock.hpp>
 6 | 
 7 | namespace poplin_mock {
 8 | 
 9 | template <template <typename> typename Mock = ::testing::StrictMock>
10 | class MockPoplinFixture {
11 | public:
12 |   MockPoplinFixture() {
13 |     mockPoplin_ = static_cast<poplin_mock::MockPoplin *>(&mockPoplin);
14 |   }
15 | 
16 |   ~MockPoplinFixture() { mockPoplin_ = nullptr; }
17 | 
18 | protected:
19 |   Mock<MockPoplin> mockPoplin;
20 | };
21 | 
22 | } // namespace poplin_mock
23 | 
24 | #endif // poplin_Mock_Fixture_hpp
25 | 


--------------------------------------------------------------------------------
/include/popnn/CTCPlan.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | /** \file
 3 |  *
 4 |  * Support for planning Connectionist Temporal Classification (CTC) Operations.
 5 |  *
 6 |  */
 7 | 
 8 | #ifndef popnn_CTCPlan_hpp
 9 | #define popnn_CTCPlan_hpp
10 | 
11 | #include <poputil/DebugInfo.hpp>
12 | 
13 | namespace popnn {
14 | namespace ctc {
15 | 
16 | /** An object representing a plan that describes how to map tensors and
17 |  *  implement the CTC Loss or CTC Inference functions.
18 |  */
19 | class Plan {
20 | public:
21 |   Plan();
22 |   ~Plan();
23 |   Plan(const Plan &other);
24 |   Plan(Plan &&other);
25 |   Plan &operator=(const Plan &other);
26 |   Plan &operator=(Plan &&other);
27 | 
28 |   friend bool operator<(const Plan &a, const Plan &b);
29 |   friend bool operator==(const Plan &a, const Plan &b);
30 | 
31 |   friend std::ostream &operator<<(std::ostream &o, const Plan &p);
32 |   friend poplar::ProfileValue poputil::toProfileValue<>(const Plan &p);
33 | 
34 |   // Internal implementation
35 |   class Impl;
36 |   Impl &getImpl() const { return *impl; }
37 |   Plan(std::unique_ptr<Impl> impl);
38 | 
39 | private:
40 |   std::unique_ptr<Impl> impl;
41 | };
42 | 
43 | bool operator<(const Plan &a, const Plan &b);
44 | bool operator==(const Plan &a, const Plan &b);
45 | bool operator!=(const Plan &a, const Plan &b);
46 | 
47 | } // namespace ctc
48 | } // namespace popnn
49 | 
50 | #endif // popnn_CTCPlan_hpp
51 | 


--------------------------------------------------------------------------------
/include/popnn/GruDef.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | /** \file
 3 |  *  Definitions for GRU cell operations.
 4 |  */
 5 | 
 6 | #ifndef popnn_GruDef_hpp
 7 | #define popnn_GruDef_hpp
 8 | 
 9 | /**
10 |  * The units within a basic GRU cell. In general all of these
11 |  * require a weight matrix, a bias and a non-linearity. Typically,
12 |  * a fixed type of non-linearity is associated with each type of unit.
13 |  */
14 | enum BasicGruCellUnit {
15 |   BASIC_GRU_CELL_RESET_GATE = 0,
16 |   BASIC_GRU_CELL_UPDATE_GATE = 1,
17 |   BASIC_GRU_CELL_CANDIDATE = 2,
18 |   BASIC_GRU_CELL_NUM_UNITS = 3
19 | };
20 | 
21 | #endif // popnn_GruDef_hpp
22 | 


--------------------------------------------------------------------------------
/include/popnn/LogSoftmax.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | /** \file
 3 |  *  Log of softmax functions.
 4 |  */
 5 | 
 6 | #ifndef popnn_LogSoftmax_hpp
 7 | #define popnn_LogSoftmax_hpp
 8 | 
 9 | #include <poplar/Graph.hpp>
10 | #include <poplar/Program.hpp>
11 | 
12 | namespace popnn {
13 | 
14 | /** Update tensor \p t by computing log of softmax in-place.
15 |  *
16 |  * \param graph             The graph to add the operation to.
17 |  * \param t                 The tensor to apply the log of softmax to.
18 |  * \param prog              The sequence to add the operation to.
19 |  * \param debugContext      Optional debug information.
20 |  */
21 | void logSoftmaxInPlace(poplar::Graph &graph, poplar::Tensor t,
22 |                        poplar::program::Sequence &prog,
23 |                        const poplar::DebugContext &debugContext = {});
24 | 
25 | /** Compute the log of the softmax to tensor \p t and return the result.
26 |  *
27 |  * \param graph             The graph to add the operation to.
28 |  * \param t                 The tensor to apply the non-linearity to.
29 |  * \param prog              The sequence to add the operation to.
30 |  * \param debugContext      Optional debug information.
31 |  *
32 |  * \returns A new tensor containing the contents of \p t with the given
33 |  *          log of the softmax applied.
34 |  */
35 | poplar::Tensor logSoftmax(poplar::Graph &graph, poplar::Tensor t,
36 |                           poplar::program::Sequence &prog,
37 |                           const poplar::DebugContext &debugContext = {});
38 | 
39 | } // end namespace popnn
40 | 
41 | #endif // popnn_LogSoftmax_hpp
42 | 


--------------------------------------------------------------------------------
/include/popnn/LstmDef.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | /** \file
 3 |  *  Definitions for LSTM cell operations.
 4 |  */
 5 | 
 6 | #ifndef popnn_LstmDef_hpp
 7 | #define popnn_LstmDef_hpp
 8 | 
 9 | /**
10 |  * The units within a basic LSTM cell.
11 |  *
12 |  * The term unit is used to refer to either
13 |  * a gate, or a cell state vector computation. In general all of these
14 |  * require a weight matrix, a bias and a non-linearity. Typically, a fixed
15 |  * type of non-linearity is associated with each type of unit.
16 |  */
17 | enum BasicLstmCellUnit {
18 |   BASIC_LSTM_CELL_FORGET_GATE = 0,
19 |   BASIC_LSTM_CELL_INPUT_GATE = 1,
20 |   BASIC_LSTM_CELL_CANDIDATE = 2,
21 |   BASIC_LSTM_CELL_OUTPUT_GATE = 3,
22 |   BASIC_LSTM_CELL_NUM_UNITS = 4
23 | };
24 | 
25 | #endif // popnn_LstmDef_hpp
26 | 


--------------------------------------------------------------------------------
/include/popnn/NonLinearityDef.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2016 Graphcore Ltd. All rights reserved.
 2 | /** \file
 3 |  *  Definitions for non-linearity operations.
 4 |  */
 5 | 
 6 | #ifndef popnn_NonLinearityDef_hpp
 7 | #define popnn_NonLinearityDef_hpp
 8 | 
 9 | namespace popnn {
10 | 
11 | enum class NonLinearityType {
12 |   /// Sigmoid:
13 |   ///  * y = 1 / (1 + e^(-x))
14 |   SIGMOID,
15 |   /// Hard Sigmoid:
16 |   ///  * y = max(0, min(1, 0.2*x + 0.5)
17 |   HARD_SIGMOID,
18 |   /// Rectified Linear Unit:
19 |   ///  * x >= 0 -> y = x
20 |   ///  * x < 0 -> y = 0
21 |   RELU,
22 |   /// Hyperbolic tangent:
23 |   ///  * y = tanh(x)
24 |   TANH,
25 |   /// Gaussian Error Linear Unit:
26 |   ///  * y = x * Phi(x)
27 |   /// where Phi(x) is the cumulative distribution function of normal gaussian
28 |   /// distribution. Phi(x) is approximated as:
29 |   ///  * Phi(x) = 0.5 * (1 + (tanh(x * 0.7978845608 * (1 + 0.044715 * x * x))))
30 |   GELU,
31 |   /// Gaussian Error Linear Unit:
32 |   /// More precise version that uses erf instead of tanh
33 |   GELU_ERF,
34 |   // SWISH:
35 |   // * y = x / (1 + e^(-x))
36 |   SWISH,
37 |   /// Softmax:
38 |   ///  * Always applied over the innermost dimension of the given tensor.
39 |   ///    Outer dimensions are independent of one another.
40 |   SOFTMAX,
41 |   /// Same as SOFTMAX, but slower more numerically stable algorithm used.
42 |   SOFTMAX_STABLE,
43 |   /// Same as SOFTMAX, but slower more numerically stable algorithm used.
44 |   /// Outputs are scaled to allow use of greater dynamic range in outputs.
45 |   SOFTMAX_SCALED
46 | };
47 | 
48 | } // end namespace popnn
49 | 
50 | #endif // popnn_NonLinearityDef_hpp
51 | 


--------------------------------------------------------------------------------
/include/popnn/PoolingDef.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | /** \file
 3 |  *  Definitions for pooling operations.
 4 |  */
 5 | 
 6 | #ifndef popnn_PoolingDef_hpp
 7 | #define popnn_PoolingDef_hpp
 8 | 
 9 | namespace popnn {
10 | 
11 | /// Pooling types
12 | enum class PoolingType { MAX, AVG, SUM };
13 | 
14 | } // End namespace popnn.
15 | 
16 | #endif // popnn_PoolingDef_hpp
17 | 


--------------------------------------------------------------------------------
/include/popnn/codelets.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popnn_codelets_hpp
 4 | #define popnn_codelets_hpp
 5 | #include <poplar/Graph.hpp>
 6 | 
 7 | /// Functions used in neural networks.
 8 | namespace popnn {
 9 | void addCodelets(poplar::Graph &graph);
10 | }
11 | 
12 | #endif // popnn_codelets_hpp
13 | 


--------------------------------------------------------------------------------
/include/popnn_mock/Mock.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #ifndef popnn_Mock_hpp
 3 | #define popnn_Mock_hpp
 4 | 
 5 | #include <gmock/gmock.h>
 6 | #include <popnn/codelets.hpp>
 7 | 
 8 | namespace popnn_mock {
 9 | 
10 | class MockPopnn {
11 | public:
12 |   // codelets.hpp
13 | 
14 |   MOCK_METHOD(void, addCodelets, (::poplar::Graph &));
15 | };
16 | 
17 | extern MockPopnn *mockPopnn_;
18 | 
19 | } // namespace popnn_mock
20 | 
21 | #endif // popnn_Mock_hpp
22 | 


--------------------------------------------------------------------------------
/include/popnn_mock/MockFixture.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #ifndef popnn_Mock_Fixture_hpp
 3 | #define popnn_Mock_Fixture_hpp
 4 | 
 5 | #include <popnn_mock/Mock.hpp>
 6 | 
 7 | namespace popnn_mock {
 8 | 
 9 | template <template <typename> typename Mock = ::testing::StrictMock>
10 | class MockPopnnFixture {
11 | public:
12 |   MockPopnnFixture() {
13 |     mockPopnn_ = static_cast<popnn_mock::MockPopnn *>(&mockPopnn);
14 |   }
15 | 
16 |   ~MockPopnnFixture() { mockPopnn_ = nullptr; }
17 | 
18 | protected:
19 |   Mock<MockPopnn> mockPopnn;
20 | };
21 | 
22 | } // namespace popnn_mock
23 | 
24 | #endif // popnn_Mock_Fixture_hpp
25 | 


--------------------------------------------------------------------------------
/include/popops/AllTrue.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | /** \file
 3 |  *
 4 |  * Perform logical AND of tensor elements.
 5 |  *
 6 |  */
 7 | 
 8 | #ifndef popops_AllTrue_hpp
 9 | #define popops_AllTrue_hpp
10 | 
11 | #include <poplar/Graph.hpp>
12 | #include <poplar/Program.hpp>
13 | #include <string>
14 | 
15 | namespace popops {
16 | 
17 | /**
18 |  * Given a boolean tensor, compute the logical AND of all its elements.
19 |  * A new variable is created to store the result.
20 |  * \param graph         The Poplar graph.
21 |  * \param A             The boolean tensor.
22 |  * \param prog          The program sequence to add this operation to.
23 |  * \param debugContext  Optional debug information.
24 |  * \returns             A variable that holds the result of the operation.
25 |  * \throw poputil::poplibs_error If the elements of \p A are not booleans.
26 |  */
27 | poplar::Tensor allTrue(poplar::Graph &graph, poplar::Tensor A,
28 |                        poplar::program::Sequence &prog,
29 |                        const poplar::DebugContext &debugContext = {});
30 | 
31 | } // namespace popops
32 | 
33 | #endif // popops_AllTrue_hpp
34 | 


--------------------------------------------------------------------------------
/include/popops/EncodingConstants.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | /** \file
 3 |  *
 4 |  * Constants used by encoding functions.
 5 |  *
 6 |  */
 7 | 
 8 | #ifndef popops_EncodingConstants_hpp
 9 | #define popops_EncodingConstants_hpp
10 | 
11 | /// Code point for masked index (an index to be ignored).
12 | #define MASKED_LABEL_CODE 0xFFFFFFFFU
13 | 
14 | /// Small constant used in natural logarithm computation.
15 | /// @{
16 | #define EPS_LOG_N_FLOAT (1.17549435e-38F)
17 | #define EPS_LOG_N_HALF (0.000000059605F)
18 | /// @}
19 | 
20 | #endif // popops_EncodingConstants_hpp
21 | 


--------------------------------------------------------------------------------
/include/popops/ExprOpUtils.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef _popops_ExprOpUtils_hpp_
 4 | #define _popops_ExprOpUtils_hpp_
 5 | 
 6 | #include <iosfwd>
 7 | 
 8 | #include <popops/ExprOp.hpp>
 9 | 
10 | namespace popops::expr {
11 | namespace ostream_ext {
12 | 
13 | std::ostream &operator<<(std::ostream &os, const UnaryOpType &t);
14 | std::ostream &operator<<(std::ostream &os, const BinaryOpType &t);
15 | std::ostream &operator<<(std::ostream &os, const TernaryOpType &t);
16 | 
17 | } // end namespace ostream_ext
18 | } // end namespace popops::expr
19 | 
20 | #endif // _popops_ExprOpUtils_hpp_
21 | 


--------------------------------------------------------------------------------
/include/popops/Operation.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | /** \file
 3 |  *
 4 |  * Read/write types of operations used in a reduce.
 5 |  *
 6 |  */
 7 | 
 8 | #ifndef popops_Operation_hpp
 9 | #define popops_Operation_hpp
10 | 
11 | #include "popops/OperationDef.hpp"
12 | #include <iosfwd>
13 | 
14 | namespace popops {
15 | 
16 | /// Parse token from input stream \p is to \p op. Valid input values are the
17 | /// stringified enumerations, for example "ADD" or "MUL".
18 | /// \return The original input stream.
19 | std::istream &operator>>(std::istream &is, Operation &op);
20 | 
21 | /// Write \p op to output stream \p os. The value written is the stringified
22 | /// enumeration, for example "ADD" or "MUL".
23 | /// \return The original output stream.
24 | std::ostream &operator<<(std::ostream &os, const Operation &op);
25 | 
26 | } // End namespace popops
27 | 
28 | #endif // popops_Operation_hpp
29 | 


--------------------------------------------------------------------------------
/include/popops/OperationDef.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | /** \file
 3 |  *
 4 |  * Define types of operations used in Reduce/MultiUpdate
 5 |  *
 6 |  */
 7 | 
 8 | #ifndef popops_OperationDef_hpp
 9 | #define popops_OperationDef_hpp
10 | 
11 | namespace popops {
12 | 
13 | /// Type of operation to use in a reduction.
14 | /// See reduce() for example use.
15 | enum class Operation {
16 |   ADD,
17 |   MUL,
18 |   MIN,
19 |   MAX,
20 |   LOGICAL_AND, ///< Only supports boolean operands.
21 |   LOGICAL_OR,  ///< Only supports boolean operands.
22 |   SQUARE_ADD,  ///< Squares each element before applying ADD reduction.
23 |   LOG_ADD,     ///< Reduce using acc = a+log(1+exp(b-a))
24 | };
25 | 
26 | } // End namespace popops
27 | 
28 | #endif // popops_OperationDef_hpp
29 | 


--------------------------------------------------------------------------------
/include/popops/SortOrder.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | /** \file
 3 |  *  Defintions of sort ordering.
 4 |  */
 5 | 
 6 | #ifndef _popops_SortOrder_hpp_
 7 | #define _popops_SortOrder_hpp_
 8 | 
 9 | #include <iosfwd>
10 | 
11 | namespace popops {
12 | 
13 | /// Defines a required order for sorting operations.
14 | enum class SortOrder {
15 |   /// No ordering is required.
16 |   NONE,
17 |   /// Sort in ascending order.
18 |   ASCENDING,
19 |   /// Sort in descending order.
20 |   DESCENDING
21 | };
22 | 
23 | std::ostream &operator<<(std::ostream &os, const SortOrder &o);
24 | 
25 | } // end namespace popops
26 | 
27 | #endif // _popops_SortOrder_hpp_
28 | 


--------------------------------------------------------------------------------
/include/popops/codelets.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popops_codelets_hpp
 4 | #define popops_codelets_hpp
 5 | #include <poplar/Graph.hpp>
 6 | 
 7 | /// Common functions, such as elementwise and reductions.
 8 | namespace popops {
 9 | void addCodelets(poplar::Graph &graph);
10 | }
11 | 
12 | #endif // popops_codelets_hpp
13 | 


--------------------------------------------------------------------------------
/include/popops_mock/Matchers.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #ifndef popops_Matchers_hpp
 3 | #define popops_Matchers_hpp
 4 | 
 5 | #include <gmock/gmock.h>
 6 | 
 7 | #include <popops/Expr.hpp>
 8 | 
 9 | namespace popops_mock {
10 | 
11 | MATCHER_P(IsExpr, e, "") { return arg.deepEquals(e); }
12 | 
13 | } // end namespace popops_mock
14 | 
15 | #endif // popops_Matchers_hpp
16 | 


--------------------------------------------------------------------------------
/include/popops_mock/MockFixture.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #ifndef popops_Mock_Fixture_hpp
 3 | #define popops_Mock_Fixture_hpp
 4 | 
 5 | #include <gmock/gmock.h>
 6 | #include <popops_mock/Mock.hpp>
 7 | 
 8 | namespace popops_mock {
 9 | 
10 | template <template <typename> typename Mock = ::testing::StrictMock>
11 | class MockPopopsFixture {
12 | public:
13 |   MockPopopsFixture() {
14 |     mockPopops_ = static_cast<popops_mock::MockPopops *>(&mockPopops);
15 |   }
16 | 
17 |   ~MockPopopsFixture() { mockPopops_ = nullptr; }
18 | 
19 | protected:
20 |   Mock<MockPopops> mockPopops;
21 | };
22 | 
23 | } // namespace popops_mock
24 | 
25 | #endif // popops_Mock_Fixture_hpp
26 | 


--------------------------------------------------------------------------------
/include/poprand/codelets.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef poprand_codelets_hpp
 4 | #define poprand_codelets_hpp
 5 | #include <poplar/Graph.hpp>
 6 | 
 7 | /// Pseudo-random number generator (PRNG) functions.
 8 | namespace poprand {
 9 | void addCodelets(poplar::Graph &graph);
10 | }
11 | 
12 | #endif // poprand_codelets_hpp
13 | 


--------------------------------------------------------------------------------
/include/poprand_mock/Mock.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #ifndef poprand_Mock_hpp
 3 | #define poprand_Mock_hpp
 4 | 
 5 | #include <gmock/gmock.h>
 6 | #include <poprand/codelets.hpp>
 7 | 
 8 | namespace poprand_mock {
 9 | 
10 | class MockPoprand {
11 | public:
12 |   // codelets.hpp
13 | 
14 |   MOCK_METHOD(void, addCodelets, (::poplar::Graph &));
15 | };
16 | 
17 | extern MockPoprand *mockPoprand_;
18 | 
19 | } // namespace poprand_mock
20 | 
21 | #endif // poprand_Mock_hpp
22 | 


--------------------------------------------------------------------------------
/include/poprand_mock/MockFixture.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #ifndef poprand_Mock_Fixture_hpp
 3 | #define poprand_Mock_Fixture_hpp
 4 | 
 5 | #include <poprand_mock/Mock.hpp>
 6 | 
 7 | namespace poprand_mock {
 8 | 
 9 | template <template <typename> typename Mock = ::testing::StrictMock>
10 | class MockPoprandFixture {
11 | public:
12 |   MockPoprandFixture() {
13 |     mockPoprand_ = static_cast<poprand_mock::MockPoprand *>(&mockPoprand);
14 |   }
15 | 
16 |   ~MockPoprandFixture() { mockPoprand_ = nullptr; }
17 | 
18 | protected:
19 |   Mock<MockPoprand> mockPoprand;
20 | };
21 | 
22 | } // namespace poprand_mock
23 | 
24 | #endif // poprand_Mock_Fixture_hpp
25 | 


--------------------------------------------------------------------------------
/include/popsparse/PlanningCache.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | /** \file
 3 |  *  Caching of plans for dynamically sparse operations.
 4 |  */
 5 | 
 6 | #ifndef popsparse_PlanningCache_hpp
 7 | #define popsparse_PlanningCache_hpp
 8 | 
 9 | #include <memory>
10 | 
11 | namespace poplin {
12 | 
13 | class PlanningCache;
14 | 
15 | } // namespace poplin
16 | 
17 | namespace popsparse {
18 | namespace dynamic {
19 | 
20 | class PlanningCacheImpl;
21 | 
22 | /** Class used to cache the calculation of plans for dynamically sparse
23 |  *  operations. This is optional and speeds up graph construction for these
24 |  *  operations.
25 |  */
26 | class PlanningCache {
27 | public:
28 |   PlanningCache();
29 |   PlanningCache(poplin::PlanningCache *planningCache);
30 |   ~PlanningCache();
31 |   std::unique_ptr<PlanningCacheImpl> impl;
32 | };
33 | 
34 | } // end namespace dynamic
35 | 
36 | namespace static_ {
37 | 
38 | class PlanningCacheImpl;
39 | 
40 | /** Class used to cache the calculation of plans for staticaally sparse
41 |  *  operations. This is optional and speeds up graph construction for these
42 |  *  operations.
43 |  */
44 | class PlanningCache {
45 | public:
46 |   PlanningCache();
47 |   PlanningCache(poplin::PlanningCache *planningCache);
48 |   ~PlanningCache();
49 |   std::unique_ptr<PlanningCacheImpl> impl;
50 | };
51 | 
52 | } // end namespace static_
53 | 
54 | } // end namespace popsparse
55 | 
56 | #endif // popsparse_PlanningCache_hpp
57 | 


--------------------------------------------------------------------------------
/include/popsparse/codelets.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | #ifndef popsparse_codelets_hpp
 3 | #define popsparse_codelets_hpp
 4 | 
 5 | #include <poplar/Graph.hpp>
 6 | 
 7 | /// Support for sparse matrices
 8 | namespace popsparse {
 9 | 
10 | void addCodelets(poplar::Graph &graph);
11 | 
12 | } // end namespace popsparse
13 | 
14 | #endif // popsparse_codelets_hpp
15 | 


--------------------------------------------------------------------------------
/include/poputil/TensorMetaData.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | /** \file TensorMetaData.hpp
 3 |  *
 4 |  * Class to allow extra data to be associated with a tensor.
 5 |  *
 6 |  */
 7 | 
 8 | #ifndef poputil_TensorMetaData_hpp
 9 | #define poputil_TensorMetaData_hpp
10 | 
11 | #include <memory>
12 | 
13 | namespace poputil {
14 | 
15 | class TensorMetaDataBase;
16 | 
17 | /** Class used to represent some unspecified form of meta-data for a tensor.
18 |  */
19 | class TensorMetaData {
20 |   std::unique_ptr<TensorMetaDataBase> data;
21 | 
22 | public:
23 |   TensorMetaData();
24 |   TensorMetaData(const TensorMetaData &other);
25 |   TensorMetaData(TensorMetaData &&other);
26 |   TensorMetaData &operator=(const TensorMetaData &other);
27 |   TensorMetaData &operator=(TensorMetaData &&other);
28 | 
29 |   // Implementation details
30 |   TensorMetaData(std::unique_ptr<TensorMetaDataBase> data);
31 |   ~TensorMetaData();
32 |   const TensorMetaDataBase *getData() const { return data.get(); }
33 | };
34 | 
35 | } // end namespace poputil
36 | 
37 | #endif // poputil_TensorMetaData_hpp
38 | 


--------------------------------------------------------------------------------
/include/poputil/exceptions.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | /** \file exceptions.hpp
 3 |  *
 4 |  *  Define a PopLibs exception.
 5 |  *
 6 |  */
 7 | 
 8 | #ifndef poputil_exceptions_hpp
 9 | #define poputil_exceptions_hpp
10 | 
11 | #include <stdexcept>
12 | #include <string>
13 | 
14 | namespace poputil {
15 | 
16 | /** Class for PopLibs exceptions */
17 | struct poplibs_error : std::runtime_error {
18 |   std::string type;
19 |   explicit poplibs_error(const std::string &s);
20 |   explicit poplibs_error(const char *s);
21 | };
22 | 
23 | } // namespace poputil
24 | 
25 | #endif // poputil_exceptions_hpp
26 | 


--------------------------------------------------------------------------------
/include/poputil_mock/Mock.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #ifndef poputil_Mock_hpp
 3 | #define poputil_Mock_hpp
 4 | 
 5 | #include <gmock/gmock.h>
 6 | #include <poputil/TileMapping.hpp>
 7 | 
 8 | namespace poputil_mock {
 9 | 
10 | class MockPoputil {
11 | public:
12 |   MOCK_METHOD(void, mapTensorLinearly,
13 |               (::poplar::Graph &, const ::poplar::Tensor &, unsigned,
14 |                unsigned));
15 | 
16 |   MOCK_METHOD(void, mapTensorLinearly,
17 |               (::poplar::Graph &, const ::poplar::Tensor &));
18 | 
19 |   MOCK_METHOD(unsigned, getTileImbalance,
20 |               (const ::poplar::Graph &, const ::poplar::Tensor &, unsigned,
21 |                unsigned));
22 | 
23 |   MOCK_METHOD((std::pair<::poplar::Tensor, unsigned>), cloneAndExpandAliasing,
24 |               (poplar::Graph &, const poplar::Tensor &, unsigned,
25 |                const poplar::DebugContext &));
26 | };
27 | 
28 | extern MockPoputil *mockPoputil_;
29 | 
30 | } // namespace poputil_mock
31 | 
32 | #endif // poputil_Mock_hpp
33 | 


--------------------------------------------------------------------------------
/include/poputil_mock/MockFixture.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #ifndef poputil_Mock_Fixture_hpp
 3 | #define poputil_Mock_Fixture_hpp
 4 | 
 5 | #include <poputil_mock/Mock.hpp>
 6 | 
 7 | namespace poputil_mock {
 8 | 
 9 | template <template <typename> typename Mock = ::testing::StrictMock>
10 | class MockPoputilFixture {
11 | public:
12 |   MockPoputilFixture() {
13 |     mockPoputil_ = static_cast<poputil_mock::MockPoputil *>(&mockPoputil);
14 |   }
15 | 
16 |   ~MockPoputilFixture() { mockPoputil_ = nullptr; }
17 | 
18 | protected:
19 |   Mock<MockPoputil> mockPoputil;
20 | };
21 | 
22 | } // namespace poputil_mock
23 | 
24 | #endif // poputil_Mock_Fixture_hpp
25 | 


--------------------------------------------------------------------------------
/lib/popfloat/codelets.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include "poplibs_support/codelets.hpp"
 3 | #include "popfloatCycleEstimators.hpp"
 4 | #include <popfloat/experimental/codelets.hpp>
 5 | 
 6 | namespace popfloat {
 7 | namespace experimental {
 8 | 
 9 | void addCodelets(poplar::Graph &graph) {
10 |   static poplibs::CurrentLibLocator loc;
11 |   graph.addCodelets(poplibs::getCodeletsPath("popfloat", "popfloat.gp", loc));
12 |   poputil::internal::registerPerfFunctions(graph, makePerfFunctionTable());
13 | }
14 | 
15 | } // end namespace experimental
16 | } // end namespace popfloat
17 | 


--------------------------------------------------------------------------------
/lib/popfloat/codelets/CastFloatToGf8.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include "poplibs_support/ExternalCodelet.hpp"
 3 | #include <poplar/Vertex.hpp>
 4 | 
 5 | static constexpr auto COMPACT_PTR = poplar::VectorLayout::COMPACT_PTR;
 6 | 
 7 | using namespace poplar;
 8 | 
 9 | namespace popfloat {
10 | namespace experimental {
11 | 
12 | class CastFloatToGf8Supervisor
13 |     : public SupervisorVertexIf<ASM_CODELETS_ENABLED> {
14 | public:
15 |   CastFloatToGf8Supervisor();
16 | 
17 |   Input<Vector<int, COMPACT_PTR, 8>> param;
18 |   Input<Vector<float, COMPACT_PTR, 8>> in;
19 |   Output<Vector<signed char, COMPACT_PTR, 4>> out;
20 |   unsigned short elementsPerWorker;
21 |   unsigned short lastWorkerParams;
22 | 
23 |   IS_EXTERNAL_CODELET(EXTERNAL_CODELET);
24 | 
25 |   void compute() {}
26 | };
27 | 
28 | } // end namespace experimental
29 | } // end namespace popfloat
30 | 


--------------------------------------------------------------------------------
/lib/popfloat/codelets/CastGf8ToFloat.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include "popfloatCodelets.hpp"
 3 | #include "popfloatUtils.hpp"
 4 | #include "poplibs_support/ExternalCodelet.hpp"
 5 | #include <array>
 6 | #include <cmath>
 7 | #include <popfloat/experimental/GfloatExpr.hpp>
 8 | #include <poplar/HalfFloat.hpp>
 9 | #include <poplar/Vertex.hpp>
10 | #include <print.h>
11 | 
12 | static constexpr auto COMPACT_PTR = poplar::VectorLayout::COMPACT_PTR;
13 | 
14 | using namespace poplar;
15 | 
16 | namespace popfloat {
17 | namespace experimental {
18 | 
19 | class CastGf8ToFloatSupervisor
20 |     : public SupervisorVertexIf<ASM_CODELETS_ENABLED> {
21 | public:
22 |   CastGf8ToFloatSupervisor();
23 | 
24 |   Input<Vector<int, COMPACT_PTR, 8>> param;
25 |   Input<Vector<signed char, COMPACT_PTR, 4>> in;
26 |   Output<Vector<float, COMPACT_PTR, 8>> out;
27 |   unsigned short elementsPerWorker;
28 |   unsigned short lastWorkerParams;
29 | 
30 |   IS_EXTERNAL_CODELET(EXTERNAL_CODELET);
31 | 
32 |   void compute() {}
33 | };
34 | 
35 | } // end namespace experimental
36 | } // end namespace popfloat
37 | 


--------------------------------------------------------------------------------
/lib/popfloat/codelets/PackedGfloatParams.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include "asm/GfloatConst.hpp"
 3 | #include "popfloatUtils.hpp"
 4 | #include "poplibs_support/ExternalCodelet.hpp"
 5 | #include <array>
 6 | #include <cmath>
 7 | #include <poplar/HalfFloat.hpp>
 8 | #include <poplar/Vertex.hpp>
 9 | 
10 | static constexpr auto SPAN = poplar::VectorLayout::SPAN;
11 | static constexpr auto ONE_PTR = poplar::VectorLayout::ONE_PTR;
12 | 
13 | using namespace poplar;
14 | 
15 | namespace popfloat {
16 | namespace experimental {
17 | 
18 | class PackedGfloatParams : public Vertex {
19 | public:
20 |   Vector<Output<int>, ONE_PTR> gfStruct;
21 |   unsigned manBits;
22 |   unsigned expBits;
23 |   int expBias;
24 |   unsigned enDenorm;
25 |   unsigned enInf;
26 | 
27 |   IS_EXTERNAL_CODELET(EXTERNAL_CODELET);
28 | 
29 |   void compute() {
30 |     uint32_t param = 0;
31 |     param += enDenorm << POPFLOAT_GF_STRUCT_ENDENORM_BIT_OFFSET;
32 |     param += enInf << POPFLOAT_GF_STRUCT_ENINF_BIT_OFFSET;
33 | 
34 |     uint32_t gfPacked;
35 |     char packed[4];
36 |     packed[POPFLOAT_GF_STRUCT_MANTISSA_SIZE_OFFSET] = manBits;
37 |     packed[POPFLOAT_GF_STRUCT_EXPONENT_SIZE_OFFSET] = expBits;
38 |     packed[POPFLOAT_GF_STRUCT_EXP_BIAS_OFFSET] = expBias;
39 |     packed[POPFLOAT_GF_STRUCT_PARAMS_OFFSET] = param;
40 | 
41 |     std::memcpy(&gfPacked, &packed, sizeof(gfPacked));
42 |     gfStruct[0] = gfPacked;
43 |   }
44 | };
45 | 
46 | } // end namespace experimental
47 | } // end namespace popfloat
48 | 


--------------------------------------------------------------------------------
/lib/popfloat/codelets/asm/PackedGfloatParams.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #ifndef PACKED_GFLOAT_PARAMS_H
 3 | #define PACKED_GFLOAT_PARAMS_H
 4 | 
 5 | #define mGfPackedParam m0
 6 | 
 7 | #define mGfManSize m1
 8 | #define mGfPackedStruct m1
 9 | 
10 | #define mGfExpSize m2
11 | #define mShortSr m2
12 | #define mFp32Man m2
13 | 
14 | #define mExpBias m3
15 | 
16 | #define mFp16Exp m4
17 | 
18 | #define mFp16Man m5
19 | 
20 | #define mGf32Exp m6
21 | 
22 | #define mGf32Man m7
23 | #define mEnDnrm m7
24 | #define mGfOpts m7
25 | #define mSrBits m7
26 | 
27 | #define mEnInf m9
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/lib/popfloat/codelets/asm/popfloatCommon.inc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | #ifdef __IPU__
 3 | #ifndef POPFLOAT_COMMON_H
 4 | #define POPFLOAT_COMMON_H
 5 | 
 6 | #include "poplar/AvailableVTypes.h"
 7 | #include "poplar/TileConstants.hpp"
 8 | 
 9 | // supervisor variables
10 | #define mWorkerEntry              m6
11 | 
12 | .macro POPFLOAT_SUPERVISOR_CAST_OP wrkEntry
13 |   setzi   $mWorkerEntry  , \wrkEntry
14 |   runall  $mWorkerEntry  , $m0                   , 0
15 |   sync    TEXCH_SYNCZONE_LOCAL
16 |   br      $lr
17 | .endm
18 | 
19 | .macro POPFLOAT_GET_WORKER_INDEX workerIdx
20 |   get     \workerIdx, $WSR
21 |   and     \workerIdx, \workerIdx, CSR_W_WSR__CTXTID_M1__MASK
22 | .endm
23 | 
24 | .macro POPFLOAT_MAYBE_LOAD_SCALED_PTR params baseAddr offset
25 | #if defined(VECTOR_AVAIL_SCALED_PTR64)
26 |   ldz16   \params, \baseAddr, $mzero, \offset
27 | #else
28 |   ld32    \params, \baseAddr, $mzero, \offset
29 | #endif
30 | .endm
31 | 
32 | .macro POPFLOAT_CONVERT_SCALED_PTR64_TO_PTR params
33 | #if defined(VECTOR_AVAIL_SCALED_PTR64)
34 |   shl     \params, \params, 3
35 | #endif
36 | .endm
37 | 
38 | .macro POPFLOAT_CONVERT_SCALED_PTR32_TO_PTR params tmem_base
39 | #if defined(VECTOR_AVAIL_SCALED_PTR32)
40 |   add     \params, \params, \tmem_base
41 |   shl     \params, \params, 2
42 | #endif
43 | .endm
44 | 
45 | 
46 | #endif
47 | #endif
48 | 


--------------------------------------------------------------------------------
/lib/popfloat/popfloatCycleEstimators.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | #ifndef __popfloatCycleEstimators_hpp__
 3 | #define __popfloatCycleEstimators_hpp__
 4 | 
 5 | #include <poputil/cyclesTables.hpp>
 6 | 
 7 | namespace popfloat {
 8 | namespace experimental {
 9 | 
10 | poputil::internal::PerfEstimatorTable makePerfFunctionTable();
11 | 
12 | } // end namespace experimental
13 | } // end namespace popfloat
14 | #endif
15 | 


--------------------------------------------------------------------------------
/lib/popfloat_mock/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | add_library(popfloat_mock SHARED
 3 |   codelets.cpp
 4 | )
 5 | 
 6 | target_link_libraries(popfloat_mock
 7 |   PRIVATE
 8 |     gccs
 9 |     GTest::gtest
10 |     GTest::gmock
11 |     Boost::boost
12 | )
13 | 
14 | target_include_directories(popfloat_mock
15 |   PUBLIC
16 |     $<TARGET_PROPERTY:popfloat,INTERFACE_INCLUDE_DIRECTORIES>
17 | )
18 | 
19 | install(TARGETS popfloat_mock
20 |         COMPONENT popfloat_mock
21 |         EXPORT popfloat_mock
22 |         DESTINATION ${CMAKE_INSTALL_LIBDIR}
23 |         INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
24 | )
25 | 
26 | install(EXPORT popfloat_mock
27 |         DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/popfloat_mock
28 |         FILE popfloat_mock-targets.cmake
29 |         COMPONENT popfloat_mock)
30 | 
31 | install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/popfloat_mock
32 |         COMPONENT popfloat_mock
33 |         DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
34 | )
35 | 


--------------------------------------------------------------------------------
/lib/popfloat_mock/codelets.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #include <popfloat_mock/Mock.hpp>
 3 | 
 4 | namespace popfloat_mock {
 5 | MockPopfloat *mockPopfloat_ = nullptr;
 6 | } // namespace popfloat_mock
 7 | 
 8 | namespace popfloat::experimental {
 9 | 
10 | void addCodelets(poplar::Graph &graph) {
11 |   popfloat_mock::mockPopfloat_->experimental_addCodelets(graph);
12 | }
13 | 
14 | } // namespace popfloat::experimental
15 | 


--------------------------------------------------------------------------------
/lib/poplibs-config.cmake:
--------------------------------------------------------------------------------
 1 | # Find dependencies
 2 | find_package(poplar REQUIRED)
 3 | 
 4 | # Compute paths
 5 | get_filename_component(POPLIBS_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
 6 | 
 7 | # Our library dependencies (contains definitions for IMPORTED targets).
 8 | # These should be in the same order as the add_subdirectory()'s in the
 9 | # CMakeLists.txt so dependencies are handled correctly
10 | foreach(t
11 |       poputil popops poprand poplin popnn popfloat
12 |     )
13 |   if(NOT TARGET ${t} AND NOT ${t}_BINARY_DIR)
14 |     include("${POPLIBS_CMAKE_DIR}/../${t}/${t}-targets.cmake")
15 |   endif()
16 | endforeach()
17 | 


--------------------------------------------------------------------------------
/lib/poplibs_mock-config.cmake:
--------------------------------------------------------------------------------
 1 | # Find dependencies
 2 | find_package(poplibs REQUIRED)
 3 | find_package(GTest REQUIRED CONFIG)
 4 | 
 5 | # Compute paths
 6 | get_filename_component(POPLIBS_MOCK_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
 7 | 
 8 | # Our library dependencies (contains definitions for IMPORTED targets).
 9 | # These should be in the same order as the add_subdirectory()'s in the
10 | # CMakeLists.txt so dependencies are handled correctly
11 | foreach(t
12 |       poputil_mock popops_mock poplin_mock popnn_mock poprand_mock popfloat_mock
13 |     )
14 |   if(NOT TARGET ${t} AND NOT ${t}_BINARY_DIR)
15 |     include("${POPLIBS_MOCK_CMAKE_DIR}/../${t}/${t}-targets.cmake")
16 |   endif()
17 | endforeach()
18 | 


--------------------------------------------------------------------------------
/lib/poplibs_support/Algorithms.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | #include "poplibs_support/Algorithms.hpp"
 3 | 
 4 | namespace poplibs {
 5 | 
 6 | template <>
 7 | std::size_t ival_begin<boost::icl::interval<std::size_t>::type>(
 8 |     const boost::icl::interval<std::size_t>::type &ival) {
 9 |   return ival.lower();
10 | }
11 | template <>
12 | std::size_t ival_end<boost::icl::interval<std::size_t>::type>(
13 |     const boost::icl::interval<std::size_t>::type &ival) {
14 |   return ival.upper();
15 | }
16 | 
17 | // From the given interval set, construct a new interval set that forms
18 | // the inverse mapping.
19 | std::vector<poplar::Interval>
20 | getInverseMapping(const std::vector<std::vector<poplar::Interval>> &mapping) {
21 |   std::map<poplar::Interval, poplar::Interval> inverseMap;
22 | 
23 |   std::size_t offset = 0;
24 |   for (unsigned tile = 0; tile < mapping.size(); ++tile) {
25 |     for (const auto &i : mapping[tile]) {
26 |       inverseMap.emplace(i, poplar::Interval(offset, offset + i.size()));
27 |       offset += i.size();
28 |     }
29 |   }
30 |   std::vector<poplar::Interval> result;
31 |   result.reserve(inverseMap.size());
32 |   for (const auto &entry : inverseMap) {
33 |     result.push_back(entry.second);
34 |   }
35 |   return result;
36 | }
37 | 
38 | } // namespace poplibs
39 | 


--------------------------------------------------------------------------------
/lib/poplibs_support/TraceChannels.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #include "poplibs_support/TraceChannels.hpp"
 4 | 
 5 | namespace poplibs_support {
 6 | 
 7 | pvti::TraceChannel tracePoplin{"graphConstruction/poplin"};
 8 | pvti::TraceChannel tracePopnn{"graphConstruction/popnn"};
 9 | pvti::TraceChannel tracePopops{"graphConstruction/popops"};
10 | pvti::TraceChannel tracePoprand{"graphConstruction/poprand"};
11 | pvti::TraceChannel tracePopsparse{"graphConstruction/popsparse"};
12 | pvti::TraceChannel tracePoputil{"graphConstruction/poputil"};
13 | 
14 | } // end namespace poplibs_support
15 | 


--------------------------------------------------------------------------------
/lib/poplibs_support/forceInterleavedEstimates.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | #include "poplibs_support/forceInterleavedEstimates.hpp"
 3 | #include <cstdlib>
 4 | 
 5 | namespace poplibs_support {
 6 | 
 7 | bool getForceInterleavedEstimates() {
 8 |   static bool forceInterleavedEstimates =
 9 |       std::getenv("POPLIBS_FORCE_INTERLEAVED_ESTIMATES");
10 |   return forceInterleavedEstimates;
11 | }
12 | 
13 | } // namespace poplibs_support
14 | 


--------------------------------------------------------------------------------
/lib/poplibs_test/Pass.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | #include <poplibs_test/Pass.hpp>
 3 | 
 4 | #include <iostream>
 5 | #include <poplibs_support/Compiler.hpp>
 6 | #include <poputil/exceptions.hpp>
 7 | 
 8 | const char *poplibs_test::asString(const Pass &pass) {
 9 |   switch (pass) {
10 |   case Pass::ALL:
11 |     return "all";
12 |   case Pass::FWD:
13 |     return "fwd";
14 |   case Pass::BWD:
15 |     return "bwd";
16 |   case Pass::WU:
17 |     return "wu";
18 |   }
19 |   POPLIB_UNREACHABLE();
20 | }
21 | 
22 | std::istream &poplibs_test::operator>>(std::istream &is, Pass &pass) {
23 |   std::string token;
24 |   is >> token;
25 |   if (token == "all")
26 |     pass = Pass::ALL;
27 |   else if (token == "fwd")
28 |     pass = Pass::FWD;
29 |   else if (token == "bwd")
30 |     pass = Pass::BWD;
31 |   else if (token == "wu")
32 |     pass = Pass::WU;
33 |   else
34 |     throw poputil::poplibs_error("Invalid pass <" + token + ">");
35 |   return is;
36 | }
37 | 
38 | std::ostream &poplibs_test::operator<<(std::ostream &os, const Pass &pass) {
39 |   return os << asString(pass);
40 | }
41 | 


--------------------------------------------------------------------------------
/lib/poplin/CanonicalConvParams.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef poplin_CanonicalConvParams_hpp
 4 | #define poplin_CanonicalConvParams_hpp
 5 | 
 6 | #include "poplin/ConvParams.hpp"
 7 | #include <boost/optional.hpp>
 8 | 
 9 | namespace poplin {
10 | 
11 | class CanonicalConvParams {
12 |   boost::optional<ConvParams> params;
13 | 
14 | public:
15 |   CanonicalConvParams(const ConvParams &params_)
16 |       : params{params_.canonicalize()} {
17 |     assert(params == params_.canonicalize() &&
18 |            "canonicalizeParams is not idempotent");
19 |   }
20 | 
21 |   const ConvParams &operator*() const { return params.get(); }
22 |   const ConvParams *operator->() const { return &params.get(); }
23 | 
24 |   friend bool operator==(const CanonicalConvParams &a,
25 |                          const CanonicalConvParams &b) {
26 |     return a.params == b.params;
27 |   }
28 | 
29 |   friend bool operator<(const CanonicalConvParams &a,
30 |                         const CanonicalConvParams &b) {
31 |     return a.params < b.params;
32 |   }
33 | 
34 |   const ConvParams &getParams() const { return params.get(); }
35 |   ConvParams &getParams() { return params.get(); }
36 | 
37 |   ConvParams &&releaseParams() { return std::move(params.get()); }
38 | };
39 | 
40 | } // namespace poplin
41 | 
42 | #endif // poplin_CanonicalConvParams_hpp
43 | 


--------------------------------------------------------------------------------
/lib/poplin/ConvReduce.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | #ifndef ConvReduce_hpp
 3 | #define ConvReduce_hpp
 4 | 
 5 | #include <string>
 6 | #include <vector>
 7 | 
 8 | #include <poplar/Graph.hpp>
 9 | #include <poplar/Tensor.hpp>
10 | #include <poplar/Type.hpp>
11 | 
12 | namespace poplin {
13 | 
14 | class ConvOptions;
15 | 
16 | poplar::Tensor
17 | multiStageGroupedReduce(poplar::Graph &graph, const poplar::Tensor &partials,
18 |                         const poplar::Type &resultType,
19 |                         std::vector<poplar::ComputeSet> &computeSets,
20 |                         const ConvOptions &options, unsigned startTile,
21 |                         bool ascendingMapping,
22 |                         const poplar::DebugNameAndId &dnai);
23 | 
24 | poplar::Tensor createMultiStageGroupedReduceOutput(
25 |     poplar::Graph &graph, const poplar::Tensor &partials,
26 |     const poplar::Type &resultType, const ConvOptions &options,
27 |     unsigned startTile, bool ascendingMapping,
28 |     const poplar::DebugNameAndId &dnai);
29 | 
30 | void multiStageGroupedReduceWithOutput(
31 |     poplar::Graph &graph, const poplar::Tensor &output,
32 |     const poplar::Tensor &partials, const poplar::Type &resultType,
33 |     std::vector<poplar::ComputeSet> &computeSets, const ConvOptions &options,
34 |     unsigned startTile, bool ascendingMapping,
35 |     const poplar::DebugNameAndId &dnai);
36 | 
37 | } // namespace poplin
38 | 
39 | #endif // ConvReduce_hpp
40 | 


--------------------------------------------------------------------------------
/lib/poplin/ConvReducePlan.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #ifndef ConvReducePlan_hpp
 3 | #define ConvReducePlan_hpp
 4 | 
 5 | #include <cassert>
 6 | #include <vector>
 7 | 
 8 | namespace poplin {
 9 | 
10 | // Given partials depth return a plan.
11 | // The plan is a vector of reduction factors of the partial depth.
12 | // The number of stages in the  reduction is equal to 1 + sizeof(return value)
13 | std::vector<unsigned> getMultiStageReducePlan(unsigned partialsDepth,
14 |                                               bool enableMultiStageReduce);
15 | 
16 | bool inline checkPartialsSizeForSingleInputReduce(
17 |     unsigned partialsBytes, const std::vector<unsigned> &memoryElementOffsets) {
18 |   // We don't want to allocate all the partials in one huge chunk if this
19 |   // is going to cause problems due to its size.
20 |   // Use a heuristic of partialsBytes rounded up to the nearest memory element
21 |   // < 1/16 of the total tile memory
22 |   assert(memoryElementOffsets.size() >= 2);
23 |   const auto memoryElementSize =
24 |       memoryElementOffsets[1] - memoryElementOffsets[0];
25 |   const auto memorySize = memoryElementOffsets.back() - memoryElementOffsets[0];
26 | 
27 |   const auto occupiedElements =
28 |       (partialsBytes + memoryElementSize - 1) / memoryElementSize;
29 | 
30 |   return occupiedElements * memoryElementSize < memorySize / 16;
31 | }
32 | 
33 | } // namespace poplin
34 | 
35 | #endif // ConvReducePlan_hpp
36 | 


--------------------------------------------------------------------------------
/lib/poplin/ConvValidation.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2016 Graphcore Ltd. All rights reserved.
 2 | #ifndef _ConvValidation_hpp_
 3 | #define _ConvValidation_hpp_
 4 | 
 5 | #include <poplin/Convolution.hpp>
 6 | #include <string>
 7 | #include <vector>
 8 | 
 9 | namespace poplin {
10 | 
11 | class ConvOptions;
12 | 
13 | // Check the parameters and the options for a layer. The options may be
14 | // updated as a side effect.
15 | void validateLayerParams(const ConvParams &params, const poplar::Target &target,
16 |                          ConvOptions &options,
17 |                          const std::string &callingFnName);
18 | 
19 | } // namespace poplin
20 | 
21 | #endif // _ConvValidation_hpp_
22 | 


--------------------------------------------------------------------------------
/lib/poplin/ConvVertices.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #include "ConvPlan.hpp"
 4 | #include "ConvProgramTree.hpp"
 5 | #include <map>
 6 | #include <poplar/Graph.hpp>
 7 | #include <poplar/Program.hpp>
 8 | #include <poplar/Tensor.hpp>
 9 | #include <poplin/ConvParams.hpp>
10 | #include <string>
11 | #include <utility>
12 | 
13 | namespace poplin {
14 | 
15 | void calcPartialConvOutput(poplar::Graph &graph, const Plan &plan,
16 |                            unsigned tile, ConvParams params,
17 |                            std::vector<poplar::program::Copy> &transformPre,
18 |                            std::vector<poplar::Tensor> &copyWritten,
19 |                            ConvProgramTree::ComputeSetsGroup &convolveCS,
20 |                            const poplar::Tensor &in,
21 |                            const poplar::Tensor &weights, poplar::Tensor out,
22 |                            bool use128BitConvUnitLoad,
23 |                            bool disableSRForAMPVertices,
24 |                            const poplar::DebugNameAndId &dnai);
25 | 
26 | void createConvPartialSlicVertex(
27 |     poplar::Graph &graph, const Plan &plan, unsigned tile, ConvParams params,
28 |     std::vector<poplar::program::Copy> &transformPre,
29 |     std::vector<poplar::Tensor> &copyWritten, poplar::ComputeSet fwdCS,
30 |     std::map<poplar::Type, std::pair<std::vector<poplar::Tensor>,
31 |                                      std::vector<poplar::Tensor>>> &postProg,
32 |     poplar::Tensor in, poplar::Tensor weights, poplar::Tensor out,
33 |     bool disableSRForAMPVertices, const poplar::DebugNameAndId &dnai);
34 | 
35 | } // namespace poplin
36 | 


--------------------------------------------------------------------------------
/lib/poplin/MultiConvolutionInternal.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef poplin_MultiConvolutionInternal_hpp
 4 | #define poplin_MultiConvolutionInternal_hpp
 5 | 
 6 | #include "CanonicalConvParams.hpp"
 7 | #include "poplin/MultiConvolution.hpp"
 8 | 
 9 | namespace poplin {
10 | namespace multiconv {
11 | namespace internal {
12 | 
13 | // These structures mirror the public structures in MultiConvolution.hpp except
14 | // public objects (like OptionFlags) have been replaced with internal objects
15 | // (like ConvOptions).
16 | 
17 | struct CreateTensorArgs {
18 |   CanonicalConvParams params;
19 |   ConvOptions options;
20 |   std::string name;
21 | };
22 | 
23 | struct ConvolutionArgs {
24 |   poplar::Tensor inputs;
25 |   poplar::Tensor weights;
26 |   CanonicalConvParams params;
27 |   ConvOptions options;
28 | };
29 | 
30 | struct CalculateWeightDeltasArgs {
31 |   poplar::Tensor zDeltas;
32 |   poplar::Tensor activations;
33 |   CanonicalConvParams params;
34 |   ConvOptions options;
35 | };
36 | 
37 | template <typename ScaleType> struct ConvWeightUpdateArgs {
38 |   poplar::Tensor zDeltas;
39 |   poplar::Tensor weights;
40 |   poplar::Tensor activations;
41 |   ScaleType scale;
42 |   CanonicalConvParams params;
43 |   ConvOptions options;
44 | };
45 | 
46 | } // namespace internal
47 | } // namespace multiconv
48 | } // namespace poplin
49 | 
50 | #endif // poplin_MultiConvolutionInternal_hpp
51 | 


--------------------------------------------------------------------------------
/lib/poplin/codelets.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | #include "poplibs_support/Tracepoint.hpp"
 3 | #include "poplinCycleEstimators.hpp"
 4 | #include <poplibs_support/Tracepoint.hpp>
 5 | #include <poplibs_support/codelets.hpp>
 6 | #include <poplin/codelets.hpp>
 7 | 
 8 | namespace poplin {
 9 | 
10 | const std::vector<std::string> winogradCodelets = {
11 |     {"poplin::WgdKernelTransform<float,4,4,3,3>",
12 |      "poplin::WgdKernelTransform<half,4,4,3,3>", "poplin::WgdPartials<float>",
13 |      "poplin::WgdPartials<half>", "poplin::WgdReduce<float,4,4>",
14 |      "poplin::WgdReduce<half,4,4>",
15 |      "poplin::WgdInverseTransform<float,4,4,3,3>",
16 |      "poplin::WgdInverseTransform<half,4,4,3,3>",
17 |      "poplin::WgdConvComplete<float>", "poplin::WgdConvComplete<half>"}};
18 | 
19 | void addCodelets(poplar::Graph &graph) {
20 |   POPLIN_TRACEPOINT();
21 | 
22 |   static poplibs::CurrentLibLocator loc;
23 |   graph.addCodelets(poplibs::getCodeletsPath("poplin", "poplin.gp", loc));
24 |   poputil::internal::registerPerfFunctions(graph, makePerfFunctionTable());
25 | 
26 |   // The winograd codelets are not currently supported and do not have correct
27 |   // cycle estimators.
28 |   auto zeroEstimator = [](const poplar::VertexIntrospector &v,
29 |                           const poplar::Target &device) {
30 |     return std::uint64_t(0);
31 |   };
32 |   for (const auto &codelet : winogradCodelets) {
33 |     graph.registerPerfEstimator(codelet, zeroEstimator);
34 |   }
35 | }
36 | 
37 | } // namespace poplin
38 | 


--------------------------------------------------------------------------------
/lib/poplin/codelets/OuterProduct.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include <cassert>
 3 | #include <cmath>
 4 | #include <poplar/HalfFloat.hpp>
 5 | #include <poplar/Vertex.hpp>
 6 | #include <type_traits>
 7 | 
 8 | #include "poplar/TileConstants.hpp"
 9 | #include "poplibs_support/ExternalCodelet.hpp"
10 | 
11 | using namespace poplar;
12 | 
13 | static constexpr auto ONE_PTR = poplar::VectorLayout::ONE_PTR;
14 | 
15 | namespace poplin {
16 | 
17 | template <class T>
18 | class [[poplar::constraint("elem(*weights) != elem(**out)")]] OuterProduct
19 |     : public Vertex {
20 | public:
21 |   OuterProduct();
22 | 
23 |   Input<Vector<T>> in;
24 |   Input<Vector<T, ONE_PTR, 8>> weights;
25 |   Vector<Output<Vector<T, ONE_PTR, 8>>> out;
26 |   const unsigned chansPerGroup;
27 | 
28 |   IS_EXTERNAL_CODELET(true);
29 | 
30 |   void compute() {
31 |     const auto width = in.size();
32 |     const auto numChanGroups = out.size();
33 | 
34 |     for (unsigned g = 0; g != numChanGroups; ++g) {
35 |       for (unsigned chanInGroup = 0; chanInGroup != chansPerGroup;
36 |            ++chanInGroup) {
37 |         const auto c = chanInGroup + g * chansPerGroup;
38 |         for (unsigned x = 0; x != width; ++x) {
39 |           out[g][chanInGroup + x * chansPerGroup] = in[x] * weights[c];
40 |         }
41 |       }
42 |     }
43 |   }
44 | };
45 | 
46 | template class OuterProduct<float>;
47 | template class OuterProduct<half>;
48 | 
49 | } // end namespace poplin
50 | 


--------------------------------------------------------------------------------
/lib/poplin/codelets/WgdConvComplete.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include <poplar/HalfFloat.hpp>
 3 | #include <poplar/Vertex.hpp>
 4 | 
 5 | #include <cassert>
 6 | 
 7 | using namespace poplar;
 8 | 
 9 | static constexpr auto ONE_PTR = poplar::VectorLayout::ONE_PTR;
10 | 
11 | namespace poplin {
12 | 
13 | template <class FPType> class WgdConvComplete : public Vertex {
14 | 
15 | public:
16 |   /* Each input vector is a of length "vecLen"
17 |    */
18 |   Vector<Input<Vector<FPType>>> dIn;
19 | 
20 |   /* The output activation once non-linearity is applied
21 |    */
22 |   Vector<Output<Vector<FPType, ONE_PTR>>, ONE_PTR> act;
23 | 
24 |   void compute() {
25 |     const unsigned nGroups = dIn.size();
26 |     const unsigned vecLen = dIn[0].size();
27 | 
28 |     for (unsigned gr = 0; gr < nGroups; ++gr) {
29 |       for (unsigned el = 0; el < vecLen; ++el) {
30 |         act[gr][el] = dIn[gr][el];
31 |       }
32 |     }
33 |   }
34 | };
35 | 
36 | template class WgdConvComplete<float>;
37 | template class WgdConvComplete<half>;
38 | 
39 | } // namespace poplin
40 | 


--------------------------------------------------------------------------------
/lib/poplin/codelets/asm/conv_partial_zero_output_stack.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | // Defines vertex stack states offsets shared across multuple
 3 | // codelets
 4 | //
 5 | #ifdef __IPU__
 6 | #ifndef __CONV_PARTIAL_ZERO_OUTPUT_STACK_DEF_S__
 7 | #define __CONV_PARTIAL_ZERO_OUTPUT_STACK_DEF_S__
 8 | 
 9 | // Shared stack between supervisor and workers
10 | #define WKR_ZERO_INFO 0   // word
11 | #define WKR_OUTCHAN_PTR 4 // word
12 | #define WKR_ZERO_OUTPUT_STACK (WKR_OUTCHAN_PTR + 4)
13 | 
14 | #endif // __CONV_PARTIAL_ZERO_OUTPUT_STACK_DEF_S__
15 | #endif // __IPU__
16 | 


--------------------------------------------------------------------------------
/lib/poplin/codelets/asm/conv_sr_ctrl.h.S:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | //
 3 | // Common header to enable/disable SR in supervisor
 4 | 
 5 | #ifndef conv_sr_ctrl_h_S_
 6 | #define conv_sr_ctrl_h_S_
 7 | #ifdef __IPU__
 8 | 
 9 | // To be used in supervisor. Save $FP_ICTL CSR to a given offset in stack
10 | // and disable Stochastic rounding.
11 | .macro SAVE_FPICTL_AND_DISABLE_SR OFFSET_IN_STACK, CTRL_REG, MASK
12 |   get           $\CTRL_REG, CSR_S_FP_ICTL__INDEX
13 |   st32          $\CTRL_REG, $sp, \OFFSET_IN_STACK
14 |   setzi         $\MASK, (CSR_S_FP_ICTL__ESR__MASK << CSR_S_FP_ICTL__ESR__SHIFT)
15 |   andc          $\CTRL_REG,  $\CTRL_REG, $\MASK
16 |   put           CSR_S_FP_ICTL__INDEX, $\CTRL_REG 
17 | .endm
18 | 
19 | // To be used in supervisor. Restore $FP_ICTL from register value in stack.
20 | .macro RESTORE_FPICTL OFFSET_IN_STACK, TEMP_REG
21 |   ld32          $\TEMP_REG, $sp, \OFFSET_IN_STACK
22 |   put           CSR_S_FP_ICTL__INDEX, $\TEMP_REG 
23 | .endm
24 | 
25 | 
26 | // =============================================================================
27 | 
28 | #endif // #ifdef __IPU__
29 | #endif // #ifdef _conv_sr_ctrl_h_S_
30 | 
31 | // =============================================================================
32 | // =============================================================================
33 | 


--------------------------------------------------------------------------------
/lib/poplin/codelets/convCastSupport.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #include <poplar/QuarterFloat.hpp>
 4 | 
 5 | #ifndef _conv_cast_support_hpp_
 6 | #define _conv_cast_support_hpp_
 7 | 
 8 | template <typename FPType, typename AccumType>
 9 | static AccumType promoteType(FPType in, quarter_metadata metadata) {
10 | #ifndef __IPU__
11 |   if constexpr (std::is_same_v<FPType, quarter>) {
12 |     AccumType result;
13 |     return poplar::toHalf(in, metadata);
14 |   } else {
15 |     return AccumType(in);
16 |   }
17 | #else
18 |   if constexpr (std::is_same_v<FPType, quarter>) {
19 | #if __IPU_ARCH_VERSION__ >= 21
20 |     return AccumType(poplar::toHalf(in, metadata));
21 | #else
22 |     // Currently unused
23 |     AccumType result;
24 |     return result;
25 | #endif
26 |   } else {
27 |     return AccumType(in);
28 |   }
29 | #endif
30 | }
31 | 
32 | #endif // _conv_cast_support_hpp_
33 | 


--------------------------------------------------------------------------------
/lib/poplin/poplinCycleEstimators.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | #ifndef __poplinCycleEstimators_hpp__
 3 | #define __poplinCycleEstimators_hpp__
 4 | 
 5 | #include <poputil/cyclesTables.hpp>
 6 | 
 7 | namespace poplin {
 8 | 
 9 | poputil::internal::PerfEstimatorTable makePerfFunctionTable();
10 | }
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/lib/poplin_mock/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | add_library(poplin_mock SHARED
 3 |   MatMul.cpp
 4 |   codelets.cpp
 5 | )
 6 | 
 7 | target_link_libraries(poplin_mock
 8 |   PRIVATE
 9 |     gccs
10 |     GTest::gtest
11 |     GTest::gmock
12 |     Boost::boost
13 | )
14 | 
15 | target_include_directories(poplin_mock
16 |   PUBLIC
17 |     $<TARGET_PROPERTY:poplin,INTERFACE_INCLUDE_DIRECTORIES>
18 | )
19 | 
20 | install(TARGETS poplin_mock
21 |         COMPONENT poplin_mock
22 |         EXPORT poplin_mock
23 |         DESTINATION ${CMAKE_INSTALL_LIBDIR}
24 |         INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
25 | )
26 | 
27 | install(EXPORT poplin_mock
28 |         DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/poplin_mock
29 |         FILE poplin_mock-targets.cmake
30 |         COMPONENT poplin_mock)
31 | 
32 | install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/poplin_mock
33 |         COMPONENT poplin_mock
34 |         DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
35 | )
36 | 


--------------------------------------------------------------------------------
/lib/poplin_mock/codelets.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #include <poplin_mock/Mock.hpp>
 3 | 
 4 | namespace poplin {
 5 | 
 6 | void addCodelets(poplar::Graph &graph) {
 7 |   poplin_mock::mockPoplin_->addCodelets(graph);
 8 | }
 9 | 
10 | } // namespace poplin
11 | 


--------------------------------------------------------------------------------
/lib/popnn/CTCPlanInternal.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popnn_CTCPlanInternal_hpp
 4 | #define popnn_CTCPlanInternal_hpp
 5 | 
 6 | #include "popnn/CTCPlan.hpp"
 7 | 
 8 | #include "CTCInferencePlan.hpp"
 9 | #include "CTCLossPlan.hpp"
10 | 
11 | #include <boost/variant.hpp>
12 | 
13 | #include <iosfwd>
14 | 
15 | namespace popnn {
16 | namespace ctc {
17 | 
18 | class Plan::Impl {
19 | public:
20 |   boost::variant<LossPlan, InferencePlan> plan;
21 | 
22 |   const LossPlan &getAsLossPlan() const;
23 |   const InferencePlan &getAsInferencePlan() const;
24 |   std::unique_ptr<Plan::Impl> clone() const;
25 | };
26 | 
27 | bool operator<(const Plan::Impl &a, const Plan::Impl &b);
28 | bool operator==(const Plan::Impl &a, const Plan::Impl &b);
29 | 
30 | std::ostream &operator<<(std::ostream &o, const Plan::Impl &p);
31 | 
32 | } // namespace ctc
33 | } // namespace popnn
34 | 
35 | #endif // #ifndef popnn_CTCPlanInternal_hpp
36 | 


--------------------------------------------------------------------------------
/lib/popnn/CreatePoolingVertex.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #include "poplin/ConvParams.hpp"
 4 | #include <poplar/Graph.hpp>
 5 | #include <poplar/Program.hpp>
 6 | #include <poplar/Tensor.hpp>
 7 | #include <popnn/Pooling.hpp>
 8 | 
 9 | namespace popnn {
10 | namespace pooling {
11 | 
12 | void createPoolingVertex(poplar::Graph &graph, const PoolParams &params,
13 |                          const poplar::Tensor &prevAct,
14 |                          const poplar::Tensor &nextAct,
15 |                          poplar::program::Sequence &prog,
16 |                          const poplar::DebugNameAndId &dnai);
17 | 
18 | poplin::ConvParams makeConvParams(const PoolParams &poolParams);
19 | 
20 | } // namespace pooling
21 | } // namespace popnn
22 | 


--------------------------------------------------------------------------------
/lib/popnn/HardSigmoid.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #include "HardSigmoid.hpp"
 4 | #include "popops/ElementWise.hpp"
 5 | 
 6 | #include <cassert>
 7 | 
 8 | namespace {
 9 | float slopeFromDiscontinuity(float discontinuity) {
10 |   return (0.5f * 1 / discontinuity);
11 | }
12 | } // namespace
13 | 
14 | namespace popnn {
15 | 
16 | std::unique_ptr<popops::expr::Expr> HardSigmoidExpr::activation() {
17 |   using namespace popops;
18 | 
19 |   const auto slope = slopeFromDiscontinuity(discontinuity_);
20 |   auto scaledAdd =
21 |       expr::Add(expr::Mul(expr::_1, expr::Const(slope)), expr::Const(0.5f));
22 |   auto clamp = expr::Clamp(scaledAdd, expr::Const(0.0f), expr::Const(1.0f));
23 | 
24 |   return clamp.clone();
25 | }
26 | 
27 | std::unique_ptr<popops::expr::Expr>
28 | HardSigmoidExpr::gradient(const poplar::Type &elementType) {
29 |   using namespace popops;
30 | 
31 |   auto mask = expr::Cast(
32 |       expr::Lte(expr::Abs(expr::_1), expr::Const(discontinuity_)), elementType);
33 | 
34 |   const auto slope = slopeFromDiscontinuity(discontinuity_);
35 |   auto derivative = expr::Mul(mask, expr::Const(slope));
36 |   auto apply = expr::Mul(derivative, expr::_2);
37 | 
38 |   return apply.clone();
39 | }
40 | 
41 | } // end namespace popnn
42 | 


--------------------------------------------------------------------------------
/lib/popnn/HardSigmoid.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popnn_HardSigmoid_hpp
 4 | #define popnn_HardSigmoid_hpp
 5 | 
 6 | #include "popops/Expr.hpp"
 7 | 
 8 | namespace popnn {
 9 | 
10 | class HardSigmoidExpr {
11 | public:
12 |   // Return a unary argument hard sigmoid expression, equivalent to max(0,
13 |   // min(1, 0.2*x + 0.5)) The argument represents the input tensor.
14 |   static std::unique_ptr<popops::expr::Expr> activation();
15 | 
16 |   // Return a binary argument hard sigmoid gradient expression,
17 |   // equivalent to 0.2 if -2.5 <= x <= 2.5 and 0 otherwise.
18 |   // The arguments represent the output and output gradient tensors.
19 |   static std::unique_ptr<popops::expr::Expr>
20 |   gradient(const poplar::Type &elementType);
21 | 
22 | private:
23 |   static constexpr float discontinuity_ = 2.5f;
24 | };
25 | 
26 | } // end namespace popnn
27 | 
28 | #endif // popnn_HardSigmoid_hpp


--------------------------------------------------------------------------------
/lib/popnn/NonLinearityInternal.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #ifndef popnn_NonLinearityInternal_hpp
 3 | #define popnn_NonLinearityInternal_hpp
 4 | 
 5 | // One hot / softmax scaling to improve accuracy
 6 | // Choosing scaling of (62000) means that accuracy is
 7 | // greatly improved compared to the default scaling of 1.0.
 8 | // The maximum we could use is 65504 which is the maximum number representatble
 9 | // in IEEE FP16 but taking it's exp(log(65504) + e) where e is a rounding error,
10 | // could result in the number overflowing.
11 | #define SOFTMAX_SCALING (62000.0F)
12 | 
13 | #endif // popnn_NonLinearityInternal_hpp
14 | 


--------------------------------------------------------------------------------
/lib/popnn/NormsInternal.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include "poplibs_support/logging.hpp"
 3 | #include "poputil/TileMapping.hpp"
 4 | #include "poputil/exceptions.hpp"
 5 | 
 6 | #include <cassert>
 7 | 
 8 | using namespace poplar;
 9 | 
10 | namespace popnn {
11 | 
12 | void checkTensorShape(Tensor acts) {
13 |   const auto rank = acts.rank();
14 |   if (rank < 2) {
15 |     throw poputil::poplibs_error("Norm supported for tensors of rank > 1");
16 |   }
17 | }
18 | 
19 | void checkNormTensorTypes(const Type &inputType, const Target &target,
20 |                           Type &partialsType) {
21 |   if (target.getTypeSize(partialsType) < target.getTypeSize(inputType)) {
22 |     poplibs_support::logging::popops::warn(
23 |         "Ignoring normalisation partialsType ({})"
24 |         " which is smaller than the input/output type ({})",
25 |         partialsType.toString(), inputType.toString());
26 |     partialsType = inputType;
27 |   }
28 | }
29 | 
30 | Tensor preProcessNormActs(const Tensor &acts) {
31 |   return acts.rank() == 2 ? acts.expand({2}) : acts;
32 | }
33 | 
34 | Tensor postProcessNormActs(const Tensor &acts, unsigned originalActsRank) {
35 |   if (originalActsRank == 2) {
36 |     assert(acts.rank() == 3 && acts.dim(2) == 1);
37 |     return acts.squeeze({2});
38 |   }
39 |   return acts;
40 | }
41 | } // namespace popnn
42 | 


--------------------------------------------------------------------------------
/lib/popnn/PoolOptions.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popnn_PoolOptions_hpp
 4 | #define popnn_PoolOptions_hpp
 5 | 
 6 | namespace popnn {
 7 | namespace pooling {
 8 | 
 9 | // Options to control the implementation of pooling.
10 | struct PoolOptions {
11 |   // Use tile introspective mapping.
12 |   // If disabled a linear tile mapping is used based on planner split
13 |   bool poolUseIntrospectiveMapping = true;
14 |   // The pooling implementation defaults to being optimised to aid memory
15 |   // allocation.  To optimise for speed instead, set this option to true
16 |   bool optimizeForSpeed = false;
17 |   // Select the data type to use for intermediate results during pooling
18 |   // calculation, selecting float values where that is beneficial to accuracy
19 |   bool useFloatPartialsWhereBeneficial = false;
20 | };
21 | 
22 | } // namespace pooling
23 | } // namespace popnn
24 | 
25 | #endif // #ifndef popnn_PoolOptions_hpp
26 | 


--------------------------------------------------------------------------------
/lib/popnn/PoolingDefUtil.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | #ifndef poplibs_PoolingDefUtil_hpp_
 3 | #define poplibs_PoolingDefUtil_hpp_
 4 | #include <poplibs_support/Compiler.hpp>
 5 | #include <popnn/PoolingDef.hpp>
 6 | #include <poputil/VertexTemplates.hpp>
 7 | 
 8 | // Specialize vertex template stringification for pooling type.
 9 | namespace poputil {
10 | 
11 | template <> struct VertexTemplateToString<popnn::PoolingType> {
12 |   static std::string to_string(const popnn::PoolingType &op) {
13 |     switch (op) {
14 |     case popnn::PoolingType::MAX:
15 |       return "popnn::PoolingType::MAX";
16 |     case popnn::PoolingType::AVG:
17 |       return "popnn::PoolingType::AVG";
18 |     case popnn::PoolingType::SUM:
19 |       return "popnn::PoolingType::SUM";
20 |     }
21 |     POPLIB_UNREACHABLE();
22 |   }
23 | };
24 | 
25 | } // end namespace poputil
26 | 
27 | #endif // poplibs_ExprOpUtil_hpp_
28 | 


--------------------------------------------------------------------------------
/lib/popnn/codelets.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | #include "popnnCycleEstimators.hpp"
 3 | #include <poplibs_support/codelets.hpp>
 4 | #include <popnn/codelets.hpp>
 5 | 
 6 | namespace popnn {
 7 | 
 8 | void addCodelets(poplar::Graph &graph) {
 9 |   static poplibs::CurrentLibLocator loc;
10 |   graph.addCodelets(poplibs::getCodeletsPath("popnn", "popnn.gp", loc));
11 |   poputil::internal::registerPerfFunctions(graph, makePerfFunctionTable());
12 | }
13 | 
14 | } // namespace popnn
15 | 


--------------------------------------------------------------------------------
/lib/popnn/codelets/CalcAccuracy.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include "popops/EncodingConstants.hpp"
 3 | #include <poplar/HalfFloat.hpp>
 4 | #include <poplar/Vertex.hpp>
 5 | 
 6 | using namespace poplar;
 7 | static constexpr auto ONE_PTR = poplar::VectorLayout::ONE_PTR;
 8 | 
 9 | namespace popnn {
10 | 
11 | template <typename LabelType> class CalcAccuracy : public Vertex {
12 | public:
13 |   CalcAccuracy();
14 | 
15 |   Input<Vector<LabelType>> maxPerBatch;
16 |   Input<Vector<LabelType, ONE_PTR>> expected;
17 |   InOut<unsigned> numCorrect;
18 | 
19 |   void compute() {
20 |     auto count = *numCorrect;
21 |     for (std::size_t i = 0; i < maxPerBatch.size(); ++i) {
22 |       if (expected[i] != MASKED_LABEL_CODE) {
23 |         count += (maxPerBatch[i] == expected[i]);
24 |       }
25 |     }
26 |     *numCorrect = count;
27 |   }
28 | };
29 | 
30 | template class CalcAccuracy<unsigned int>;
31 | template class CalcAccuracy<int>;
32 | 
33 | } // namespace popnn
34 | 


--------------------------------------------------------------------------------
/lib/popnn/codelets/LogOps.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | #include <cmath>
 3 | 
 4 | namespace popnn {
 5 | // Given log values, perform an equivalent `linear mul` operation
 6 | template <typename FPType>
 7 | inline FPType logMul(const FPType a, const FPType b) {
 8 |   return a + b;
 9 | }
10 | #ifdef __IPU__
11 | // Given log values, perform an equivalent `linear add` operation
12 | template <typename FPType>
13 | inline FPType logAdd(const FPType a_, const FPType b_) {
14 |   // Casting required as exp<half>() undefined
15 |   const auto a = static_cast<float>(a_);
16 |   const auto b = static_cast<float>(b_);
17 |   float max, min;
18 |   // Compiled code doesn't produce optimal f32max, f32min instructions
19 |   asm(" f32max %[asm_max], %[asm_a], %[asm_b]"
20 |       : [asm_max] "=r"(max)
21 |       : [asm_a] "r"(a), [asm_b] "r"(b));
22 |   asm(" f32min %[asm_min], %[asm_a], %[asm_b]"
23 |       : [asm_min] "=r"(min)
24 |       : [asm_a] "r"(a), [asm_b] "r"(b));
25 |   return static_cast<FPType>(max + std::log(1 + std::exp(min - max)));
26 | }
27 | #else
28 | template <typename FPType>
29 | inline FPType logAdd(const FPType a_, const FPType b_) {
30 |   FPType max = a_ < b_ ? b_ : a_;
31 |   FPType min = a_ < b_ ? a_ : b_;
32 |   // Casting required as exp<half>() undefined
33 |   return static_cast<FPType>(
34 |       static_cast<float>(max) +
35 |       std::log(1 + std::exp(static_cast<float>(min - max))));
36 | }
37 | #endif
38 | 
39 | } // namespace popnn
40 | 


--------------------------------------------------------------------------------
/lib/popnn/codelets/LossSumSquaredTransform.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include "poplar/AvailableVTypes.h"
 3 | #include "poplibs_support/ExternalCodelet.hpp"
 4 | #include <poplar/HalfFloat.hpp>
 5 | #include <poplar/Vertex.hpp>
 6 | 
 7 | static constexpr auto PTR_ALIGN32 = poplar::VectorLayout::ONE_PTR;
 8 | 
 9 | using namespace poplar;
10 | 
11 | namespace popnn {
12 | 
13 | template <typename FPType> class LossSumSquaredTransform : public Vertex {
14 | public:
15 |   LossSumSquaredTransform();
16 | 
17 |   Input<Vector<FPType, PTR_ALIGN32, 4>> probs;
18 |   Input<Vector<FPType, PTR_ALIGN32, 4>> expected;
19 |   Output<Vector<FPType, PTR_ALIGN32, 4>> deltas;
20 |   Output<Vector<FPType, PTR_ALIGN32, 4>> transformed;
21 |   const unsigned short size;
22 | 
23 |   IS_EXTERNAL_CODELET(true);
24 | 
25 |   void compute() {
26 |     for (std::size_t i = 0; i < size; i++) {
27 |       FPType expect = expected[i];
28 |       FPType actual = probs[i];
29 |       FPType delta = (actual - expect);
30 |       deltas[i] = delta;
31 |       transformed[i] = FPType(0.5) * delta * delta;
32 |     }
33 |   }
34 | };
35 | 
36 | template class LossSumSquaredTransform<float>;
37 | template class LossSumSquaredTransform<half>;
38 | 
39 | } // namespace popnn
40 | 


--------------------------------------------------------------------------------
/lib/popnn/codelets/NonLinearity1D.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include "NonLinearity.hpp"
 3 | 
 4 | #include "poplibs_support/ExternalCodelet.hpp"
 5 | 
 6 | using namespace poplar;
 7 | 
 8 | namespace popnn {
 9 | 
10 | template <typename FPType, NonLinearityType nlType>
11 | class NonLinearity1DInPlace : public MultiVertex {
12 | public:
13 |   NonLinearity1DInPlace();
14 | 
15 |   InOut<Vector<FPType, ONE_PTR>> data;
16 |   const unsigned short n;
17 | 
18 |   IS_EXTERNAL_CODELET(true);
19 |   void compute(unsigned wid) {
20 |     if (wid == 0) {
21 |       for (unsigned i = 0; i < n; ++i) {
22 |         data[i] = nonlinearity(nlType, float(data[i]));
23 |       }
24 |     }
25 |   }
26 | };
27 | 
28 | INSTANTIATE_NL(NonLinearity1DInPlace)
29 | 
30 | template <typename FPType, NonLinearityType nlType>
31 | class NonLinearity1D : public MultiVertex {
32 | public:
33 |   NonLinearity1D();
34 | 
35 |   Input<Vector<FPType, ONE_PTR, 8>> data;
36 |   Output<Vector<FPType, ONE_PTR, 8>> out;
37 |   const unsigned short n;
38 | 
39 |   IS_EXTERNAL_CODELET(true);
40 |   void compute(unsigned wid) {
41 |     if (wid == 0) {
42 |       for (unsigned i = 0; i < n; ++i) {
43 |         out[i] = nonlinearity(nlType, float(data[i]));
44 |       }
45 |     }
46 |   }
47 | };
48 | 
49 | template class NonLinearity1D<float, popnn::NonLinearityType::SWISH>;
50 | template class NonLinearity1D<half, popnn::NonLinearityType::SWISH>;
51 | 
52 | } // namespace popnn
53 | 


--------------------------------------------------------------------------------
/lib/popnn/codelets/NonLinearity2D.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include "NonLinearity.hpp"
 3 | 
 4 | using namespace poplar;
 5 | 
 6 | namespace popnn {
 7 | 
 8 | template <typename FPType, NonLinearityType nlType>
 9 | class NonLinearity2DInPlace : public Vertex {
10 | public:
11 |   NonLinearity2DInPlace();
12 | 
13 |   InOut<VectorList<FPType, DELTANELEMENTS>> data;
14 | 
15 |   IS_EXTERNAL_CODELET(true);
16 |   void compute() {
17 |     for (unsigned i = 0; i < data.size(); ++i) {
18 |       for (unsigned j = 0; j < data[i].size(); ++j) {
19 |         data[i][j] = FPType(nonlinearity(nlType, float(data[i][j])));
20 |       }
21 |     }
22 |   }
23 | };
24 | 
25 | INSTANTIATE_NL(NonLinearity2DInPlace)
26 | 
27 | template <typename FPType, NonLinearityType nlType>
28 | class NonLinearity2D : public Vertex {
29 | public:
30 |   NonLinearity2D();
31 | 
32 |   Input<VectorList<FPType, DELTANELEMENTS, 8>> data;
33 |   Vector<Output<Vector<FPType, ONE_PTR, 8>>, ONE_PTR> out;
34 | 
35 |   IS_EXTERNAL_CODELET(true);
36 |   void compute() {
37 |     for (unsigned i = 0; i < data.size(); ++i) {
38 |       for (unsigned j = 0; j < data[i].size(); ++j) {
39 |         out[i][j] = FPType(nonlinearity(nlType, float(data[i][j])));
40 |       }
41 |     }
42 |   }
43 | };
44 | 
45 | template class NonLinearity2D<float, popnn::NonLinearityType::SWISH>;
46 | template class NonLinearity2D<half, popnn::NonLinearityType::SWISH>;
47 | 
48 | } // namespace popnn
49 | 


--------------------------------------------------------------------------------
/lib/popnn/codelets/NonLinearityGrad1D.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include "NonLinearity.hpp"
 3 | 
 4 | using namespace poplar;
 5 | 
 6 | namespace popnn {
 7 | 
 8 | template <typename FPType, NonLinearityType nlType>
 9 | class NonLinearityGrad1D : public MultiVertex {
10 | public:
11 |   NonLinearityGrad1D();
12 | 
13 |   Input<Vector<FPType, ONE_PTR, 8>> outGrad;
14 |   Input<Vector<FPType, ONE_PTR, 8>> out;
15 |   Output<Vector<FPType, ONE_PTR, 8>> inGrad;
16 |   const unsigned short n;
17 | 
18 |   IS_EXTERNAL_CODELET(nlType != NonLinearityType::GELU_ERF);
19 |   void compute(unsigned wid) {
20 |     if (wid == 0) {
21 |       for (unsigned i = 0; i < n; ++i) {
22 |         const auto derivative = nonlinearity_derivative(nlType, float(out[i]));
23 |         inGrad[i] = outGrad[i] * FPType(derivative);
24 |       }
25 |     }
26 |   }
27 | };
28 | 
29 | INSTANTIATE_NL_GRAD(NonLinearityGrad1D)
30 | 
31 | } // namespace popnn
32 | 


--------------------------------------------------------------------------------
/lib/popnn/codelets/NonLinearityGrad2D.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include "NonLinearity.hpp"
 3 | 
 4 | using namespace poplar;
 5 | 
 6 | namespace popnn {
 7 | 
 8 | template <typename FPType, NonLinearityType nlType>
 9 | class NonLinearityGrad2D : public Vertex {
10 | public:
11 |   NonLinearityGrad2D();
12 | 
13 |   Vector<Input<Vector<FPType, ONE_PTR, 8>>, ONE_PTR> outGrad;
14 |   Vector<Input<Vector<FPType, ONE_PTR, 8>>, ONE_PTR> out;
15 |   Output<VectorList<FPType, DELTANELEMENTS, 8>> inGrad;
16 | 
17 |   IS_EXTERNAL_CODELET(nlType != NonLinearityType::GELU_ERF);
18 |   void compute() {
19 |     for (unsigned i = 0; i < inGrad.size(); ++i) {
20 |       for (unsigned j = 0; j < inGrad[i].size(); ++j) {
21 |         const auto derivative =
22 |             nonlinearity_derivative(nlType, float(out[i][j]));
23 |         inGrad[i][j] = outGrad[i][j] * FPType(derivative);
24 |       }
25 |     }
26 |   }
27 | };
28 | 
29 | INSTANTIATE_NL_GRAD(NonLinearityGrad2D)
30 | 
31 | } // namespace popnn
32 | 


--------------------------------------------------------------------------------
/lib/popnn/popnnCycleEstimators.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | #ifndef __popnnCycleEstimators_hpp__
 3 | #define __popnnCycleEstimators_hpp__
 4 | 
 5 | #include "poputil/exceptions.hpp"
 6 | 
 7 | #include <poputil/cyclesTables.hpp>
 8 | 
 9 | namespace popnn {
10 | 
11 | poputil::internal::PerfEstimatorTable makePerfFunctionTable();
12 | }
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/lib/popnn_mock/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | add_library(popnn_mock SHARED
 3 |   codelets.cpp
 4 | )
 5 | 
 6 | target_link_libraries(popnn_mock
 7 |   PRIVATE
 8 |     gccs
 9 |     GTest::gtest
10 |     GTest::gmock
11 |     Boost::boost
12 | )
13 | 
14 | target_include_directories(popnn_mock
15 |   PUBLIC
16 |     $<TARGET_PROPERTY:popnn,INTERFACE_INCLUDE_DIRECTORIES>
17 | )
18 | 
19 | install(TARGETS popnn_mock
20 |         COMPONENT popnn_mock
21 |         EXPORT popnn_mock
22 |         DESTINATION ${CMAKE_INSTALL_LIBDIR}
23 |         INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
24 | )
25 | 
26 | install(EXPORT popnn_mock
27 |         DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/popnn_mock
28 |         FILE popnn_mock-targets.cmake
29 |         COMPONENT popnn_mock)
30 | 
31 | install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/popnn_mock
32 |         COMPONENT popnn_mock
33 |         DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
34 | )
35 | 


--------------------------------------------------------------------------------
/lib/popnn_mock/codelets.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #include <popnn_mock/Mock.hpp>
 3 | 
 4 | namespace popnn_mock {
 5 | MockPopnn *mockPopnn_ = nullptr;
 6 | } // namespace popnn_mock
 7 | 
 8 | namespace popnn {
 9 | 
10 | void addCodelets(poplar::Graph &graph) {
11 |   popnn_mock::mockPopnn_->addCodelets(graph);
12 | }
13 | 
14 | } // namespace popnn
15 | 


--------------------------------------------------------------------------------
/lib/popops/AllTrue.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | #include "popops/AllTrue.hpp"
 3 | 
 4 | #include "poplibs_support/Tracepoint.hpp"
 5 | #include "popops/Reduce.hpp"
 6 | #include "poputil/DebugInfo.hpp"
 7 | #include "poputil/exceptions.hpp"
 8 | 
 9 | using namespace poplar;
10 | using namespace poplar::program;
11 | using namespace poputil;
12 | 
13 | namespace popops {
14 | 
15 | Tensor allTrue(Graph &graph, Tensor in, Sequence &prog,
16 |                const poplar::DebugContext &debugContext) {
17 |   POPOPS_TRACEPOINT();
18 |   poputil::PoplibsOpDebugInfo di(debugContext, DI_ARGS(in));
19 | 
20 |   const auto inType = in.elementType();
21 | 
22 |   if (inType != BOOL) {
23 |     throw poputil::poplibs_error(
24 |         "Operation allTrue only takes boolean tensors");
25 |   }
26 |   auto inFlat = in.flatten();
27 |   auto output = reduce(graph, inFlat, inType, {0},
28 |                        popops::Operation::LOGICAL_AND, prog, {di});
29 |   di.addOutput(output);
30 |   return output;
31 | }
32 | 
33 | } // end namespace popops
34 | 


--------------------------------------------------------------------------------
/lib/popops/BitonicTopK.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef _popops_BitonicTopK_hpp_
 4 | #define _popops_BitonicTopK_hpp_
 5 | 
 6 | #include <optional>
 7 | #include <vector>
 8 | 
 9 | #include <poplar/DebugContext.hpp>
10 | #include <poplar/Graph.hpp>
11 | #include <poplar/Program.hpp>
12 | #include <poplar/Tensor.hpp>
13 | 
14 | namespace popops {
15 | namespace bitonic {
16 | 
17 | poplar::Tensor createTopKInputImpl(poplar::Graph &graph,
18 |                                    const poplar::Type &type,
19 |                                    const std::vector<std::size_t> &shape,
20 |                                    const poplar::DebugNameAndId &dnai = {});
21 | 
22 | /// Implementation of topK using bitonic sort based method.
23 | /// Returns a pair of top k values in t, and matching permutation
24 | /// of \p other if it was given.
25 | std::pair<poplar::Tensor, poplar::Tensor>
26 | topKImpl(poplar::Graph &graph, poplar::program::Sequence &prog,
27 |          const poplar::Tensor &t, const std::optional<poplar::Tensor> &other,
28 |          const unsigned k, const bool largest, const bool sorted,
29 |          const bool ascendingOrder, const bool otherIsSecondaryKey,
30 |          const bool sortOtherInReverseOrder,
31 |          const poplar::DebugNameAndId &dnai = {});
32 | 
33 | } // end namespace bitonic
34 | } // end namespace popops
35 | 
36 | #endif // _popops_BitonicTopK_hpp_
37 | 


--------------------------------------------------------------------------------
/lib/popops/CastModelling.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popops_CastModelling_hpp
 4 | #define popops_CastModelling_hpp
 5 | 
 6 | #include <gccs/popsolver/Model.hpp>
 7 | 
 8 | // Forward declarations
 9 | namespace poplar {
10 | class Target;
11 | class Type;
12 | } // namespace poplar
13 | 
14 | namespace popops {
15 | namespace modelling {
16 | 
17 | struct CastEstimates {
18 |   CastEstimates(const gccs::popsolver::Variable &init) : cycles(init) {}
19 |   gccs::popsolver::Variable cycles;
20 | };
21 | 
22 | CastEstimates modelContiguousCast(const poplar::Target &target,
23 |                                   const poplar::Type &inType,
24 |                                   const poplar::Type &outType,
25 |                                   gccs::popsolver::Model &m,
26 |                                   const gccs::popsolver::Variable &mNumElems,
27 |                                   const std::string &debugPrefix = "");
28 | 
29 | } // end namespace modelling
30 | } // end namespace popops
31 | 
32 | #endif // popops_CastModelling_hpp
33 | 


--------------------------------------------------------------------------------
/lib/popops/ElementWiseInternal.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popops_ElementWiseInternal_hpp
 4 | #define popops_ElementWiseInternal_hpp
 5 | 
 6 | #include <poplar/Tensor.hpp>
 7 | #include <popops/Expr.hpp>
 8 | 
 9 | #include <optional>
10 | #include <vector>
11 | 
12 | namespace popops {
13 | 
14 | const poplar::Tensor &
15 | getTensorFromPlaceHolder(const expr::PlaceHolder &p,
16 |                          const std::vector<poplar::Tensor> &ts);
17 | 
18 | const poplar::Type &
19 | getTypeFromPlaceHolder(const expr::PlaceHolder &p,
20 |                        const std::vector<poplar::Type> &tTypes);
21 | 
22 | std::unordered_map<const expr::Expr *, poplar::Type>
23 | getConstType(const expr::Expr &expr, const std::vector<poplar::Type> &tTypes);
24 | 
25 | struct ExprAndType {
26 |   std::unique_ptr<expr::Expr> expression;
27 |   poplar::Type type;
28 | };
29 | 
30 | // Recursively walk up the expression tree and replace expressions with
31 | // simplified expressions where possible
32 | ExprAndType optimise(const expr::Expr &expr,
33 |                      const std::vector<poplar::Type> &tTypes);
34 | 
35 | } // end namespace popops
36 | 
37 | #endif // popops_ElementWiseInternal_hpp
38 | 


--------------------------------------------------------------------------------
/lib/popops/ExprOpUtils.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #include "popops/ExprOpUtils.hpp"
 3 | #include "ExprOpUtil.hpp"
 4 | 
 5 | #include <iostream>
 6 | 
 7 | namespace popops::expr {
 8 | namespace ostream_ext {
 9 | 
10 | std::ostream &operator<<(std::ostream &os, const UnaryOpType &t) {
11 |   os << unaryOpTypeToString(t);
12 |   return os;
13 | }
14 | 
15 | std::ostream &operator<<(std::ostream &os, const BinaryOpType &t) {
16 |   os << binaryOpTypeToString(t);
17 |   return os;
18 | }
19 | 
20 | std::ostream &operator<<(std::ostream &os, const TernaryOpType &t) {
21 |   os << ternaryOpTypeToString(t);
22 |   return os;
23 | }
24 | 
25 | } // end namespace ostream_ext
26 | } // end namespace popops::expr
27 | 


--------------------------------------------------------------------------------
/lib/popops/FillModelling.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popops_FillModelling_hpp
 4 | #define popops_FillModelling_hpp
 5 | 
 6 | #include <gccs/popsolver/Model.hpp>
 7 | 
 8 | // Forward declarations
 9 | namespace poplar {
10 | class Target;
11 | class Type;
12 | } // namespace poplar
13 | 
14 | namespace popops {
15 | namespace modelling {
16 | 
17 | struct FillEstimates {
18 |   FillEstimates(const gccs::popsolver::Variable &init) : cycles(init) {}
19 |   gccs::popsolver::Variable cycles;
20 | };
21 | 
22 | FillEstimates modelContiguousFill(const poplar::Target &target,
23 |                                   const poplar::Type &type,
24 |                                   gccs::popsolver::Model &m,
25 |                                   const gccs::popsolver::Variable &numElems,
26 |                                   const std::string &debugPrefix = "");
27 | 
28 | } // end namespace modelling
29 | } // end namespace popops
30 | 
31 | #endif // popops_FillModelling_hpp
32 | 


--------------------------------------------------------------------------------
/lib/popops/HistogramPerformanceEstimation.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | #ifndef _popops_HistogramPerformanceEstimation_hpp_
 3 | #define _popops_HistogramPerformanceEstimation_hpp_
 4 | 
 5 | #include <cstdint>
 6 | 
 7 | std::uint64_t histogram1DByLimitEstimate(
 8 |     unsigned elements, unsigned histogramCount, bool isAbsolute, bool isHalf,
 9 |     unsigned numWorkers, unsigned vectorWidth, unsigned unpackCostHistogram = 0,
10 |     unsigned unpackCostLimits = 0);
11 | 
12 | std::uint64_t histogram1DByDataEstimate(
13 |     unsigned elements, unsigned histogramCount, bool isAbsolute, bool isHalf,
14 |     unsigned numWorkers, unsigned vectorWidth, unsigned unpackCostHistogram = 0,
15 |     unsigned unpackCostLimits = 0);
16 | 
17 | #endif // __popops_HistogramPerformanceEstimation_hpp__
18 | 


--------------------------------------------------------------------------------
/lib/popops/RearrangeUtil.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | #ifndef popops_RearrangeInternal_hpp
 3 | #define popops_RearrangeInternal_hpp
 4 | 
 5 | #include <vector>
 6 | 
 7 | namespace popops {
 8 | namespace internal {
 9 | 
10 | // Can a a number of transpositions be split amongst workers?
11 | bool canSplitTranspose(unsigned numTranspositions, unsigned numWorkers);
12 | 
13 | // Split a number of transpositions amongst workers such that each worker gets
14 | // at most a single slice of a transposition.
15 | std::vector<unsigned> createSplitTranspose1DWorkList(unsigned rows,
16 |                                                      unsigned cols,
17 |                                                      unsigned numTranspositions,
18 |                                                      unsigned numWorkers,
19 |                                                      unsigned blockSize);
20 | 
21 | } // namespace internal
22 | } // namespace popops
23 | #endif
24 | 


--------------------------------------------------------------------------------
/lib/popops/ScalarMultiply.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | #ifndef popops_ScalarMultiply_hpp
 3 | #define popops_ScalarMultiply_hpp
 4 | 
 5 | #include <poplar/Graph.hpp>
 6 | #include <poplar/OptionFlags.hpp>
 7 | #include <poplar/Program.hpp>
 8 | #include <poplar/Tensor.hpp>
 9 | #include <poputil/DebugInfo.hpp>
10 | 
11 | namespace popops {
12 | 
13 | void scalarMultiplyInplace(poplar::Graph &graph, const poplar::Tensor &a,
14 |                            const poplar::Tensor &b,
15 |                            poplar::program::Sequence &prog,
16 |                            poputil::PoplibsOpDebugInfo &di,
17 |                            const poplar::OptionFlags &options = {});
18 | 
19 | poplar::Tensor scalarMultiply(poplar::Graph &graph, const poplar::Tensor &a,
20 |                               const poplar::Tensor &b,
21 |                               poplar::program::Sequence &prog,
22 |                               poputil::PoplibsOpDebugInfo &di,
23 |                               const poplar::OptionFlags &options = {});
24 | 
25 | bool inputsMatchMixedPrecisionScalarMultiplyPattern(
26 |     const poplar::Tensor &a, const poplar::Tensor &b,
27 |     bool orderInvariant = false);
28 | 
29 | } // namespace popops
30 | 
31 | #endif // popops_ScalarMultiply_hpp
32 | 


--------------------------------------------------------------------------------
/lib/popops/ScaledAddModelling.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #include "ScaledAddModelling.hpp"
 4 | 
 5 | #include <poplar/Target.hpp>
 6 | #include <poplar/Type.hpp>
 7 | 
 8 | #include <popops/PerformanceEstimation.hpp>
 9 | 
10 | using namespace poplar;
11 | using namespace popops::internal;
12 | namespace popsolver = gccs::popsolver;
13 | 
14 | namespace popops {
15 | namespace modelling {
16 | 
17 | ScaledAddEstimates modelContiguousScaledAdd(
18 |     const Target &target, const Type &dataType, const Type &dataBType,
19 |     const bool isMemConstrained, popsolver::Model &m,
20 |     const popsolver::Variable &mNumElems, const std::string &debugPrefix) {
21 |   ScaledArithmeticTargetParameters targetParams(target, dataType);
22 |   ScaledAddEstimates e(m.zero());
23 | 
24 |   e.cycles = m.call<unsigned>(
25 |       {mNumElems},
26 |       [targetParams, dataType, dataBType,
27 |        isMemConstrained](const std::vector<unsigned> &values) {
28 |         const unsigned numElems = values[0];
29 |         // NOTE: vector layout is a small one-off cost, and not currently
30 |         // easy to get from the target so just assuming.
31 |         const auto vectorLayout = layout::Vector::ScaledPtr64;
32 |         const auto cycles = getScaledArithmeticSupervisorCycleEstimate(
33 |             targetParams, dataType, dataBType, isMemConstrained,
34 |             ScaledArithmeticOp::ADD, vectorLayout, vectorLayout, numElems);
35 |         return popsolver::DataType{cycles};
36 |       },
37 |       debugPrefix + ".cycles");
38 |   return e;
39 | }
40 | 
41 | } // end namespace modelling
42 | } // end namespace popops
43 | 


--------------------------------------------------------------------------------
/lib/popops/ScaledAddModelling.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popops_ScaledAddModelling_hpp
 4 | #define popops_ScaledAddModelling_hpp
 5 | 
 6 | #include <gccs/popsolver/Model.hpp>
 7 | 
 8 | // Forward declarations
 9 | namespace poplar {
10 | class Target;
11 | class Type;
12 | } // namespace poplar
13 | 
14 | namespace popops {
15 | namespace modelling {
16 | 
17 | struct ScaledAddEstimates {
18 |   ScaledAddEstimates(const gccs::popsolver::Variable &init) : cycles(init) {}
19 |   gccs::popsolver::Variable cycles;
20 | };
21 | 
22 | ScaledAddEstimates modelContiguousScaledAdd(
23 |     const poplar::Target &target, const poplar::Type &dataType,
24 |     const poplar::Type &dataBType, const bool isMemConstrained,
25 |     gccs::popsolver::Model &m, const gccs::popsolver::Variable &mNumElems,
26 |     const std::string &debugPrefix);
27 | 
28 | } // end namespace modelling
29 | } // end namespace popops
30 | 
31 | #endif // popops_ScaledAddModelling_hpp
32 | 


--------------------------------------------------------------------------------
/lib/popops/SortOrder.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #include <popops/SortOrder.hpp>
 4 | #include <poputil/exceptions.hpp>
 5 | 
 6 | #include <ostream>
 7 | 
 8 | namespace popops {
 9 | 
10 | std::ostream &operator<<(std::ostream &os, const SortOrder &o) {
11 |   switch (o) {
12 |   case SortOrder::NONE:
13 |     os << "none";
14 |     break;
15 |   case SortOrder::ASCENDING:
16 |     os << "ascending";
17 |     break;
18 |   case SortOrder::DESCENDING:
19 |     os << "descending";
20 |     break;
21 |   default:
22 |     throw poputil::poplibs_error("Unhandled sort order");
23 |   }
24 |   return os;
25 | }
26 | 
27 | } // end namespace popops
28 | 


--------------------------------------------------------------------------------
/lib/popops/Zero.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2016 Graphcore Ltd. All rights reserved.
 2 | #include "popops/Zero.hpp"
 3 | 
 4 | #include "poplibs_support/Tracepoint.hpp"
 5 | #include "popops/Fill.hpp"
 6 | #include "poputil/DebugInfo.hpp"
 7 | 
 8 | namespace popops {
 9 | 
10 | void zero(poplar::Graph &graph, poplar::Tensor t,
11 |           const std::vector<poplar::Interval> &tileRegions, unsigned tile,
12 |           poplar::ComputeSet zeroCS) {
13 |   fill(graph, t, tileRegions, tile, zeroCS, 0);
14 | }
15 | 
16 | void zero(poplar::Graph &graph, const poplar::Tensor &t, unsigned tile,
17 |           poplar::ComputeSet zeroCS) {
18 |   fill(graph, t, tile, zeroCS, 0);
19 | }
20 | 
21 | void zero(poplar::Graph &graph, const poplar::Tensor &t,
22 |           const std::vector<std::vector<poplar::Interval>> &mapping,
23 |           poplar::ComputeSet zeroCS) {
24 |   fill(graph, t, mapping, zeroCS, 0);
25 | }
26 | 
27 | void zero(poplar::Graph &graph, const poplar::Tensor &t,
28 |           poplar::program::Sequence &prog,
29 |           const poplar::DebugContext &debugContext) {
30 |   POPOPS_TRACEPOINT();
31 |   poputil::PoplibsOpDebugInfo di(debugContext, DI_ARGS(t));
32 |   fill(graph, t, prog, 0, {di});
33 | }
34 | 
35 | } // end namespace popops
36 | 


--------------------------------------------------------------------------------
/lib/popops/codelets/CircBufIncrIndex.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | #include "poplibs_support/ExternalCodelet.hpp"
 3 | #include <poplar/HalfFloat.hpp>
 4 | #include <poplar/Vertex.hpp>
 5 | 
 6 | using namespace poplar;
 7 | namespace popops {
 8 | 
 9 | class CircBufIncrIndex : public Vertex {
10 | public:
11 |   CircBufIncrIndex();
12 | 
13 |   InOut<unsigned> index;
14 |   const unsigned hSize;
15 |   void compute() { *index = (*index + 1) % hSize; }
16 | };
17 | 
18 | } // namespace popops
19 | 


--------------------------------------------------------------------------------
/lib/popops/codelets/CircOffset.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | #include "poplibs_support/ExternalCodelet.hpp"
 3 | #include <poplar/HalfFloat.hpp>
 4 | #include <poplar/Vertex.hpp>
 5 | 
 6 | using namespace poplar;
 7 | namespace popops {
 8 | 
 9 | class CircOffset : public Vertex {
10 | public:
11 |   CircOffset();
12 | 
13 |   Input<unsigned> indexIn;
14 |   Output<unsigned> indexOut;
15 |   const unsigned hSize;
16 |   const unsigned offset;
17 |   void compute() {
18 |     auto updated = *indexIn + offset;
19 |     if (updated >= hSize) {
20 |       updated -= hSize;
21 |     }
22 |     *indexOut = updated;
23 |   }
24 | };
25 | 
26 | } // namespace popops
27 | 


--------------------------------------------------------------------------------
/lib/popops/codelets/ContinuousReduce.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include "ReduceCodelets.hpp"
 3 | 
 4 | #ifdef __IPU__
 5 | // For real implementation
 6 | using ShortType = unsigned short;
 7 | #else
 8 | // To avoid size overflow on CPU implementation
 9 | using ShortType = unsigned;
10 | #endif
11 | 
12 | namespace popops {
13 | 
14 | template <typename OutType, bool isUpdate>
15 | using ROT = typename std::conditional<
16 |     isUpdate, poplar::InOut<poplar::Vector<OutType, PTR_ALIGN32, 4>>,
17 |     poplar::Output<poplar::Vector<OutType, PTR_ALIGN32, 4>>>::type;
18 | 
19 | template <typename ReduceOp, typename PartialsType, typename OutType,
20 |           bool isUpdate>
21 | static constexpr bool useExternal() {
22 |   bool opIsAddOrSquareAdd = std::is_same<ReduceOp, ReduceAdd>::value ||
23 |                             std::is_same<ReduceOp, ReduceSquareAdd>::value;
24 | 
25 |   bool partialsAndOutputAreFloatsOrHalfs =
26 |       (std::is_same<OutType, float>::value ||
27 |        std::is_same<OutType, half>::value) &&
28 |       (std::is_same<PartialsType, float>::value ||
29 |        std::is_same<PartialsType, half>::value);
30 | 
31 |   bool opIsMinOrMax = std::is_same<ReduceOp, ReduceMax>::value ||
32 |                       std::is_same<ReduceOp, ReduceMin>::value;
33 | 
34 |   bool partialsAndOutputAreTheSameType =
35 |       std::is_same<PartialsType, OutType>::value;
36 | 
37 |   return (opIsAddOrSquareAdd && partialsAndOutputAreFloatsOrHalfs) ||
38 |          (opIsMinOrMax && partialsAndOutputAreFloatsOrHalfs &&
39 |           partialsAndOutputAreTheSameType);
40 | }
41 | 
42 | } // namespace popops
43 | 


--------------------------------------------------------------------------------
/lib/popops/codelets/HeapSort.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #include <limits>
 4 | #include <poplar/HalfFloat.hpp>
 5 | #include <poplar/Vertex.hpp>
 6 | #include <utility>
 7 | 
 8 | namespace popops {
 9 | 
10 | template <typename BIter> static void reverse(BIter begin, BIter end) {
11 |   while (begin < end) {
12 |     end--;
13 |     std::swap(*begin, *end);
14 |     begin++;
15 |   }
16 | }
17 | 
18 | template <typename BIter>
19 | static void rotate(BIter begin, BIter new_begin, BIter end) {
20 |   reverse(begin, new_begin);
21 |   reverse(new_begin, end);
22 |   reverse(begin, end);
23 | }
24 | 
25 | static std::uint32_t root() { return 0; }
26 | 
27 | static std::uint32_t parent(std::uint32_t index) { return (index - 1) / 2; }
28 | 
29 | static std::uint32_t left(std::uint32_t index) { return (index * 2) + 1; }
30 | 
31 | static std::uint32_t right(std::uint32_t index) { return (index * 2) + 2; }
32 | 
33 | } // namespace popops
34 | 


--------------------------------------------------------------------------------
/lib/popops/codelets/Iota.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include "poplibs_support/ExternalCodelet.hpp"
 3 | #include <poplar/VectorTypes.hpp>
 4 | #include <poplar/Vertex.hpp>
 5 | 
 6 | using namespace poplar;
 7 | 
 8 | namespace popops {
 9 | 
10 | static constexpr auto ONE_PTR = poplar::VectorLayout::ONE_PTR;
11 | 
12 | template <typename OutType> class Iota : public Vertex {
13 | public:
14 |   Iota();
15 | 
16 |   IS_EXTERNAL_CODELET(false);
17 |   Vector<Output<Vector<OutType>>> out;
18 |   Input<Vector<OutType, ONE_PTR>> offsets;
19 | 
20 |   void compute() {
21 |     for (unsigned i = 0; i != out.size(); ++i) {
22 |       auto base = offsets[i];
23 |       auto n = out[i].size();
24 |       for (unsigned j = 0; j != n; ++j) {
25 |         out[i][j] = base + static_cast<OutType>(j);
26 |       }
27 |     }
28 |   }
29 | };
30 | 
31 | template class Iota<unsigned>;
32 | template class Iota<int>;
33 | 
34 | } // namespace popops
35 | 


--------------------------------------------------------------------------------
/lib/popops/codelets/MultiUpdateOp.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | #ifndef _MultiUpdateOp_hpp_
 3 | #define _MultiUpdateOp_hpp_
 4 | 
 5 | #include "poplar/AvailableVTypes.h"
 6 | #include "poplibs_support/ExternalCodelet.hpp"
 7 | #include "popops/OperationDef.hpp"
 8 | 
 9 | #include <cassert>
10 | #include <cmath>
11 | #include <poplar/HalfFloat.hpp>
12 | #include <poplar/Vertex.hpp>
13 | 
14 | static constexpr auto ONE_PTR = poplar::VectorLayout::ONE_PTR;
15 | static constexpr auto COMPACT_PTR = poplar::VectorLayout::COMPACT_PTR;
16 | 
17 | namespace popops {
18 | 
19 | template <typename T> T updateOp(Operation op, T x, T y) {
20 |   switch (op) {
21 |   case popops::Operation::ADD:
22 |     return x + y;
23 |   case popops::Operation::MUL:
24 |     return x * y;
25 |   case popops::Operation::MAX:
26 |     return x > y ? x : y;
27 |   default:
28 |     assert(0 && "Operation not supported");
29 |     return x;
30 |   }
31 | }
32 | 
33 | } // namespace popops
34 | 
35 | #endif // #ifndef _MultiUpdateOp_hpp_
36 | 


--------------------------------------------------------------------------------
/lib/popops/codelets/SelectScalarFromRows.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include "popops/EncodingConstants.hpp"
 3 | #include <poplar/HalfFloat.hpp>
 4 | #include <poplar/Vertex.hpp>
 5 | 
 6 | static constexpr auto ONE_PTR = poplar::VectorLayout::ONE_PTR;
 7 | 
 8 | namespace popops {
 9 | namespace {
10 | template <typename T>
11 | inline T getParam(const T *params, unsigned index, unsigned start, unsigned end,
12 |                   unsigned width) {
13 |   static_assert(std::is_same<T, float>() || std::is_same<T, half>(),
14 |                 "T must be a either float or half");
15 |   if (width <= index && index != MASKED_LABEL_CODE) {
16 |     return static_cast<T>(__builtin_nanf(""));
17 |   }
18 |   if (index < start || end <= index || index == MASKED_LABEL_CODE) {
19 |     return static_cast<T>(0.f);
20 |   }
21 |   return params[index - start];
22 | }
23 | 
24 | // The type half does not have the -- operator.
25 | template <typename T>
26 | inline void decrementParams(T *params, unsigned index, unsigned startCol,
27 |                             unsigned endCol, unsigned paramsWidth) {
28 |   if (__builtin_expect(index < paramsWidth, 1)) {
29 |     if (__builtin_expect(startCol <= index && index < endCol, 0)) {
30 |       params[index - startCol] = params[index - startCol] - static_cast<T>(1.f);
31 |     }
32 |   } else {
33 |     if (index != MASKED_LABEL_CODE) {
34 |       for (unsigned col = startCol; col != endCol; ++col) {
35 |         params[col - startCol] = static_cast<T>(__builtin_nanf(""));
36 |       }
37 |     }
38 |   }
39 | }
40 | 
41 | } // namespace
42 | } // namespace popops
43 | 


--------------------------------------------------------------------------------
/lib/popops/codelets/asm/BinarySearch.h.S:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef BinarySearch_h_S
 4 | #define BinarySearch_h_S
 5 | #ifdef __IPU__
 6 | 
 7 | // These functions are used by multiUpdate and multiUpdateOp. The registers
 8 | // are carefully aliased so that there is minimal register spill. 
 9 | 
10 | #include "poplar/AvailableVTypes.h"
11 | #include "poplar/TileConstants.hpp"
12 | #include "poplar/StackSizeDefs.hpp"
13 | 
14 | // Binary search functions
15 | .extern lowerBinarySearch
16 | .extern upperBinarySearch
17 | 
18 | // binary search register aliases
19 | #define mBS_indicesPtr     m0
20 | #define mBS_numIndices     m1
21 | #define mBS_targetValue    m2
22 | #define mBS_retLr          m11
23 | #define mBS_startIndex     m7
24 | #define mBS_endIndex       m10
25 | 
26 | #endif // __IPU__
27 | #endif // BinarySearch_h_S
28 | 


--------------------------------------------------------------------------------
/lib/popops/codelets/asm/MathConstants.S:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | // +/- Max half
 3 | #define MAX_HALF 0x7BFF
 4 | #define MIN_HALF 0xFBFF
 5 | 
 6 | // +/- Max half broadcast into 32 bits for half
 7 | #define MAX_HALF_BROADCAST 0x7BFF7BFF
 8 | #define MIN_HALF_BROADCAST 0xFBFFFBFF
 9 | 
10 | // +/- Inf for float
11 | #define MAX_FLOAT 0x7F800000
12 | #define MIN_FLOAT 0xFF800000
13 | 
14 | #define HALF_1_0 0x3c00
15 | #define HALF_1_0_BROADCAST 0x3c003c00
16 | #define HALF_NEG_1_0 0xbc00
17 | 
18 | #define FLOAT_1_0 0x3f800000
19 | #define FLOAT_NEG_1_0 0xbf800000
20 | 


--------------------------------------------------------------------------------
/lib/popops/codelets/asm/ScaledAddSupervisor.inc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | #ifndef __SCALED_ADD_SUPERVISOR_INC__
 3 | #define __SCALED_ADD_SUPERVISOR_INC__
 4 | 
 5 | // Shared register across files for the scale value
 6 | #define k a6
 7 | 
 8 | // Flag for memConstraints
 9 | #define MEM_CONSTRAINTS_MASK 0x1
10 | // Common register between files to indicate memConstraints were applied
11 | #define memConstraints m11
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/lib/popops/codelets/asm/dynamicSlice.inc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | #ifdef __IPU__
 3 | #ifndef __DYNAMIC_SLICE_INC__
 4 | #define __DYNAMIC_SLICE_INC__
 5 | 
 6 | // Common definitions for the dynamicSlice/MultiSlice code
 7 | 
 8 | 
 9 | // Parameters for the copy macros/functions defined in
10 | // dynamicSliceSupervisor_8bit.S, MultiSlice.S and SliceCopyFunction.S
11 | #define mReturnAddress  m7
12 | #define mSrcPtr         m8
13 | #define mDstPtr         m9
14 | #define mRegionSize     m10
15 | #define mScratch        m11
16 | 
17 | // ARF registers used to load/store values during copy
18 | #define VAL12   a0:1
19 | #define VAL1    a0
20 | #define VAL2    a1
21 | #define VAL3    a2
22 | 
23 | 
24 | //****************************************************************************
25 | // Offset in scratch area ('worker_base') where we save registers
26 | //****************************************************************************
27 | 
28 | // Save area for DynamicSlice2d_common
29 | #define WOFF_REGIONS         0
30 | #define WOFF_BASET_BASE      1
31 | #define WOFF_SUBT_BASE       2
32 | 
33 | // Save area for 'Slice_copy_function_8bit'
34 | #define WOFF_SAVE_REGS1      3
35 | #define WOFF_SAVE_REGS2      4
36 | #define WOFF_SAVE_REGS3      5
37 | #define WOFF_SAVE_REGS4      6
38 | 
39 | #endif // __DYNAMIC_SLICE_INC__
40 | #endif // __IPU__
41 | 


--------------------------------------------------------------------------------
/lib/popops/codelets/asm/histogramCommon.S:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | // Shared macros, constants used by Histogram vertices
 4 | 
 5 | // Macros to implement the abs value of $a0 or $a0:1 if required by the vertex
 6 | .macro CONDITIONAL_ABSv2HALF IS_ABS
 7 | .ifc "\IS_ABS", "true"
 8 |   f16v2absadd $a0, $azero, $a0
 9 | .endif
10 | .endm
11 | 
12 | .macro CONDITIONAL_ABSv4HALF_BUNDLE IS_ABS
13 | .ifc "\IS_ABS", "true"
14 | { nop
15 |   f16v4absadd $a0:1, $azeros, $a0:1
16 | }
17 | .endif
18 | .endm
19 | 
20 | .macro CONDITIONAL_ABSv4HALF IS_ABS
21 | .ifc "\IS_ABS", "true"
22 |   f16v4absadd $a0:1, $azeros, $a0:1
23 | .endif
24 | .endm
25 | 
26 | .macro CONDITIONAL_ABSv1FLOAT IS_ABS
27 | .ifc "\IS_ABS", "true"
28 |   f32absadd $a0, $azero, $a0
29 | .endif
30 | .endm
31 | 
32 | .macro CONDITIONAL_ABSv2FLOAT_BUNDLE IS_ABS
33 | .ifc "\IS_ABS", "true"
34 | { nop
35 |   f32v2absadd $a0:1, $azeros, $a0:1
36 | }
37 | .endif
38 | .endm
39 | 
40 | .macro CONDITIONAL_ABSv2FLOAT IS_ABS
41 | .ifc "\IS_ABS", "true"
42 |   f32v2absadd $a0:1, $azeros, $a0:1
43 | .endif
44 | .endm
45 | 


--------------------------------------------------------------------------------
/lib/popops/codelets/inlineAssembler.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #include <ipu_builtins.h>
 4 | 
 5 | // Write 16 bits to memory assuming a 32 bit aligned destination pointer
 6 | static __attribute__((always_inline)) void write16Aligned32(half in,
 7 |                                                             half2 *outPtr) {
 8 |   // Ensure that the operand that is put into a 32 register is 32 bits in size
 9 |   half2 result = {in, in};
10 |   half2 toPreserve = *outPtr;
11 |   *outPtr = __builtin_shufflevector(result, toPreserve, 1, 3);
12 | }
13 | // Combine four 8bit values in the 8 lsbs of each input into a single 32
14 | // bit result. bits 8..31 of the inputs are ignored
15 | static __attribute__((always_inline)) unsigned
16 | combine8bit(unsigned in0, unsigned in1, unsigned in2, unsigned in3) {
17 | 
18 |   auto a = reinterpret_cast<ushort2>(__builtin_ipu_shuf8x8lo(in0, in1));
19 |   auto b = reinterpret_cast<ushort2>(__builtin_ipu_shuf8x8lo(in2, in3));
20 |   return reinterpret_cast<unsigned>(__builtin_shufflevector(a, b, 0, 2));
21 | }
22 | 


--------------------------------------------------------------------------------
/lib/popops/codelets/util.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | #ifndef popops_codelets_util_hpp
 3 | #define popops_codelets_util_hpp
 4 | 
 5 | template <typename T> const T &max(const T &x, const T &y) {
 6 |   return x < y ? y : x;
 7 | }
 8 | 
 9 | template <typename T> const T &min(const T &x, const T &y) {
10 |   return x < y ? x : y;
11 | }
12 | 
13 | #endif // popops_codelets_util_hpp
14 | 


--------------------------------------------------------------------------------
/lib/popops/reduction/ComputeSetList.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | #ifndef ComputeSetList_hpp
 3 | #define ComputeSetList_hpp
 4 | 
 5 | #include <cstdint>
 6 | #include <poplar/DebugContext.hpp>
 7 | #include <poplar/Graph.hpp>
 8 | #include <vector>
 9 | 
10 | // This is a convenience class for using a vector<ComputeSet>/
11 | class ComputeSetList {
12 | public:
13 |   // Create a wrapper around a vector<ComputeSet> that records the
14 |   // latest compute set we have used and adds more as needed. The vector
15 |   // must outlive this wrapper.
16 |   explicit ComputeSetList(std::vector<poplar::ComputeSet> &css);
17 | 
18 |   // Return the compute set for the current pos() and increment pos(). If
19 |   // there isn't one, create one with the given name.
20 |   poplar::ComputeSet add(poplar::Graph &graph,
21 |                          const poplar::DebugNameAndId &dani);
22 | 
23 |   // Return the number of times add() has been called for this list. Note
24 |   // that the underlying vector<ComputeSet> may be larger.
25 |   std::size_t pos() const;
26 | 
27 |   // Set pos(). An exception is thrown if newPos is greater than the
28 |   // underlying vector's size.
29 |   void setPos(std::size_t newPos);
30 | 
31 |   // Functions for accessing compute sets used in 2-stage reductions
32 |   poplar::ComputeSet &getCs1(const unsigned computeSets);
33 |   poplar::ComputeSet &getCs2(const unsigned computeSets);
34 | 
35 | private:
36 |   std::vector<poplar::ComputeSet> &css;
37 |   std::size_t pos_ = 0;
38 | };
39 | 
40 | #endif // ComputeSetList_hpp
41 | 


--------------------------------------------------------------------------------
/lib/popops/reduction/CycleEstimationFunctions.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | #ifndef CycleEstimationFunctions_hpp
 3 | #define CycleEstimationFunctions_hpp
 4 | 
 5 | #include <cstdint>
 6 | #include <vector>
 7 | 
 8 | #include <poplar/Graph.hpp>
 9 | #include <poplar/Target.hpp>
10 | #include <poplar/VertexIntrospector.hpp>
11 | #include <popops/Reduce.hpp>
12 | 
13 | namespace popops {
14 | enum class ReductionSpecialisation;
15 | 
16 | /// Get the cycle estimate for a strided reduce vertex with the
17 | /// given parameters. This is exposed for modelling.
18 | std::uint64_t getCyclesEstimateForStridedReduce(
19 |     const std::size_t partialsSize, const std::size_t numPartials,
20 |     const std::size_t numOutputs, const unsigned stride,
21 |     const unsigned numOuterStrides, const unsigned dataPathWidth,
22 |     const unsigned vectorWidth, const poplar::Type &partialsType,
23 |     const poplar::Type &outType, const popops::Operation operation,
24 |     bool update);
25 | 
26 | /// Get the cycle estimate for a reduction. This obtains field sizes from the
27 | /// vertex and calls through to getCyclesEstimateForReduce(). See that
28 | /// function for details.
29 | poplar::VertexPerfEstimate getCycleEstimateForReduceVertex(
30 |     const poplar::VertexIntrospector &vertex, const poplar::Target &target,
31 |     const poplar::Type &partialsType, const poplar::Type &outType,
32 |     popops::Operation operation, bool isUpdate,
33 |     popops::ReductionSpecialisation specialisation);
34 | 
35 | } // namespace popops
36 | 
37 | #endif // CycleEstimationFunctions_hpp
38 | 


--------------------------------------------------------------------------------
/lib/popops/reduction/IntermediatePartialsUtil.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | #ifndef IntermediatePartialsUtil_hpp
 3 | #define IntermediatePartialsUtil_hpp
 4 | 
 5 | #include "IntermediatePartials.hpp"
 6 | 
 7 | namespace popops {
 8 | 
 9 | /// Given a 2D tensor with `wrapSize` columns and the given tile mapping, this
10 | /// determines if any tile has more than one value from the same column mapped
11 | /// to it.
12 | bool mappingHasMultipleValuesFromOneColumnOnTheSameTile(
13 |     const poplar::Graph::TileToTensorMapping &mapping, std::size_t wrapSize);
14 | 
15 | /// Given a 2D tensor where only one value from each column is present on a
16 | /// single tile, this converts it into the IntermediatePartials format.
17 | ///
18 | /// If those conditions are not true an exception is thrown.
19 | IntermediatePartials
20 | tensorToIntermediatePartials(const poplar::Tensor &A,
21 |                              const poplar::Graph::TileToTensorMapping &mapping);
22 | 
23 | } // namespace popops
24 | 
25 | #endif // IntermediatePartialsUtil_hpp
26 | 


--------------------------------------------------------------------------------
/lib/popops/reduction/ReductionVertexDefs.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | #ifndef popops_reduction_ReductionVertexDefs_hpp_
 3 | #define popops_reduction_ReductionVertexDefs_hpp_
 4 | 
 5 | namespace popops {
 6 | 
 7 | enum class ReductionSpecialisation {
 8 |   // TODO: T12965 Swap 2&3 so that higher specialisations are cheaper.
 9 | 
10 |   DEFAULT,
11 |   SCALAR_OUTPUT_REGIONS,
12 |   SCALAR_OUTPUT_SINGLE_INPUT,
13 |   STRIDED_REDUCE,
14 |   STRIDED_REDUCE_OUTER,
15 |   ALL_REGIONS_CONTINUOUS
16 | };
17 | 
18 | // For use with STRIDED_REDUCE
19 | template <typename T> struct CountsAndStrides {
20 |   T numOutputsM1;
21 |   T numPartialsM1;
22 |   T partialsWidth;
23 |   T numOuterStridesM1;
24 |   T outerStride;
25 | };
26 | 
27 | } // end namespace popops
28 | #endif // popops_reduction_ReductionVertexDefs_hpp_
29 | 


--------------------------------------------------------------------------------
/lib/popops_mock/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | add_library(popops_mock SHARED
 3 |   ElementWise.cpp
 4 |   ElementWiseUtil.cpp
 5 |   codelets.cpp
 6 | )
 7 | 
 8 | target_link_libraries(popops_mock
 9 |   PRIVATE
10 |     gccs
11 |     GTest::gtest
12 |     GTest::gmock
13 |     Boost::boost
14 | )
15 | 
16 | target_include_directories(popops_mock
17 |   PUBLIC
18 |     $<TARGET_PROPERTY:popops,INTERFACE_INCLUDE_DIRECTORIES>
19 | )
20 | 
21 | install(TARGETS popops_mock
22 |         COMPONENT popops_mock
23 |         EXPORT popops_mock
24 |         DESTINATION ${CMAKE_INSTALL_LIBDIR}
25 |         INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
26 | )
27 | 
28 | install(EXPORT popops_mock
29 |         DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/popops_mock
30 |         FILE popops_mock-targets.cmake
31 |         COMPONENT popops_mock)
32 | 
33 | install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/popops_mock
34 |         COMPONENT popops_mock
35 |         DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
36 | )
37 | 


--------------------------------------------------------------------------------
/lib/popops_mock/ElementWiseUtil.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #include <popops_mock/Mock.hpp>
 3 | 
 4 | namespace popops {
 5 | 
 6 | poplar::Tensor createOutputForElementWiseOp(
 7 |     poplar::Graph &graph, const std::vector<poplar::Tensor> &inputs,
 8 |     const poplar::Type &outputType, const poplar::DebugContext &debugContext) {
 9 |   return popops_mock::mockPopops_->createOutputForElementWiseOp(
10 |       graph, inputs, outputType, debugContext);
11 | }
12 | 
13 | } // end namespace popops
14 | 


--------------------------------------------------------------------------------
/lib/popops_mock/codelets.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #include <popops_mock/Mock.hpp>
 3 | 
 4 | namespace popops {
 5 | 
 6 | void addCodelets(poplar::Graph &graph) {
 7 |   popops_mock::mockPopops_->addCodelets(graph);
 8 | }
 9 | 
10 | } // namespace popops
11 | 


--------------------------------------------------------------------------------
/lib/poprand/codelets.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | #include "poprandCycleEstimators.hpp"
 3 | #include <poplibs_support/codelets.hpp>
 4 | #include <poprand/codelets.hpp>
 5 | 
 6 | namespace poprand {
 7 | 
 8 | void addCodelets(poplar::Graph &graph) {
 9 |   static poplibs::CurrentLibLocator loc;
10 |   graph.addCodelets(poplibs::getCodeletsPath("poprand", "poprand.gp", loc));
11 |   poputil::internal::registerPerfFunctions(graph, makePerfFunctionTable());
12 | }
13 | 
14 | } // namespace poprand
15 | 


--------------------------------------------------------------------------------
/lib/poprand/codelets/Normal.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include "RandomUtils.hpp"
 3 | 
 4 | using namespace poplar;
 5 | 
 6 | namespace poprand {
 7 | 
 8 | template <typename OutType> class Normal : public MultiVertex {
 9 | public:
10 |   Normal();
11 | 
12 |   Output<Vector<OutType, SPAN, 8>> out;
13 |   const float mean;   // mean of normal distribution
14 |   const float stdDev; // standard deviation of normal distribution
15 | 
16 |   // SimOnlyField<bool> saveRestoreSeed;
17 | 
18 |   IS_EXTERNAL_CODELET(true);
19 | 
20 |   void compute(unsigned wid) {
21 |     if (wid == 0) {
22 |       uint32_t seed[2] = {0xDEADBEEF, 0xBEEFDEAD};
23 |       uint32_t seedModifier = 0x900DDEED;
24 | 
25 |       uint64_t seedH = seed[0] + (static_cast<uint64_t>(seed[1]) << 32);
26 |       uint64_t seedL = seed[1] + (static_cast<uint64_t>(seed[0]) << 32);
27 |       auto s = initialiseAndPrime({seedL, seedH});
28 |       bool isHalf = std::is_same<OutType, half>::value;
29 |       const unsigned maxPerCall = isHalf ? 4 : 2;
30 |       unsigned n = out.size();
31 |       unsigned idx = 0;
32 |       while (n) {
33 |         const unsigned genSamples = min(n, maxPerCall);
34 |         const auto grandVec = grand(s);
35 |         for (auto k = 0; k != genSamples; ++k, ++idx) {
36 |           out[idx] = grandVec[k] * stdDev + mean;
37 |         }
38 |         n -= genSamples;
39 |       }
40 |     }
41 |   }
42 | };
43 | 
44 | template class Normal<float>;
45 | template class Normal<half>;
46 | 
47 | } // namespace poprand
48 | 


--------------------------------------------------------------------------------
/lib/poprand/codelets/SetSeed.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 2 | #include "RandomUtils.hpp"
 3 | 
 4 | using namespace poplar;
 5 | 
 6 | namespace poprand {
 7 | 
 8 | class SetSeed : public MultiVertex {
 9 | public:
10 |   SetSeed();
11 | 
12 |   Input<Vector<unsigned, ONE_PTR, 8>> seed;
13 |   const uint32_t seedModifierUser;
14 |   const uint32_t seedModifierHw;
15 | 
16 |   IS_EXTERNAL_CODELET(true);
17 | 
18 |   void compute(unsigned wid) {}
19 | };
20 | 
21 | } // namespace poprand
22 | 


--------------------------------------------------------------------------------
/lib/poprand/poprandCycleEstimators.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | #ifndef __poprandCycleEstimators_hpp__
 3 | #define __poprandCycleEstimators_hpp__
 4 | 
 5 | #include <poputil/cyclesTables.hpp>
 6 | 
 7 | namespace poprand {
 8 | 
 9 | poputil::internal::PerfEstimatorTable makePerfFunctionTable();
10 | 
11 | }
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/lib/poprand_mock/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | add_library(poprand_mock SHARED
 3 |   codelets.cpp
 4 | )
 5 | 
 6 | target_link_libraries(poprand_mock
 7 |   PRIVATE
 8 |     gccs
 9 |     GTest::gtest
10 |     GTest::gmock
11 |     Boost::boost
12 | )
13 | 
14 | target_include_directories(poprand_mock
15 |   PUBLIC
16 |     $<TARGET_PROPERTY:poprand,INTERFACE_INCLUDE_DIRECTORIES>
17 | )
18 | 
19 | install(TARGETS poprand_mock
20 |         COMPONENT poprand_mock
21 |         EXPORT poprand_mock
22 |         DESTINATION ${CMAKE_INSTALL_LIBDIR}
23 |         INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
24 | )
25 | 
26 | install(EXPORT poprand_mock
27 |         DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/poprand_mock
28 |         FILE poprand_mock-targets.cmake
29 |         COMPONENT poprand_mock)
30 | 
31 | install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/poprand_mock
32 |         COMPONENT poprand_mock
33 |         DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
34 | )
35 | 


--------------------------------------------------------------------------------
/lib/poprand_mock/codelets.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #include <poprand_mock/Mock.hpp>
 3 | 
 4 | namespace poprand_mock {
 5 | MockPoprand *mockPoprand_ = nullptr;
 6 | } // namespace poprand_mock
 7 | 
 8 | namespace poprand {
 9 | 
10 | void addCodelets(poplar::Graph &graph) {
11 |   poprand_mock::mockPoprand_->addCodelets(graph);
12 | }
13 | 
14 | } // namespace poprand
15 | 


--------------------------------------------------------------------------------
/lib/popsparse/BSNonLinearity.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popsparse_BSNonLinearity_hpp
 4 | #define popsparse_BSNonLinearity_hpp
 5 | 
 6 | #include <poplar/Graph.hpp>
 7 | #include <poplar/Tensor.hpp>
 8 | #include <popsparse/experimental/BlockSparse.hpp>
 9 | 
10 | namespace popsparse {
11 | namespace experimental {
12 | 
13 | poplar::Tensor bsSoftmaxInternal(
14 |     poplar::Graph &graph, poplar::Tensor sparseTensor, bool inPlace,
15 |     unsigned blockRow, unsigned blockCol, unsigned blockRows,
16 |     unsigned blockCols, const unsigned char *sparsity,
17 |     popsparse::experimental::SubBlockMask subBlockMaskType, unsigned numGroups,
18 |     poplar::program::Sequence &prog, const poplar::DebugNameAndId &dnai);
19 | 
20 | poplar::Tensor bsSoftmaxGradInternal(
21 |     poplar::Graph &graph, poplar::Tensor sparseOut,
22 |     poplar::Tensor sparseOutGrad, unsigned blockRow, unsigned blockCol,
23 |     unsigned blockRows, unsigned blockCols, const unsigned char *sparsity,
24 |     poplar::program::Sequence &prog, const poplar::DebugNameAndId &dnai);
25 | 
26 | } // namespace experimental
27 | } // namespace popsparse
28 | 
29 | #endif // popsparse_BSNonLinearity_hpp


--------------------------------------------------------------------------------
/lib/popsparse/BSOps.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popsparse_BSOps_hpp
 4 | #define popsparse_BSOps_hpp
 5 | 
 6 | #include <poplar/Graph.hpp>
 7 | #include <poplar/Tensor.hpp>
 8 | #include <popsparse/experimental/BlockSparseMatMul.hpp>
 9 | 
10 | namespace popsparse {
11 | namespace experimental {
12 | 
13 | poplar::Tensor slice(const poplar::Tensor &sparseTensor, std::size_t coord,
14 |                      unsigned dimension, unsigned blockRow, unsigned blockCol,
15 |                      unsigned blockRows, unsigned blockCols,
16 |                      bool columnMajorBlock, const unsigned char *sparsity);
17 | 
18 | void applySubBlockMask(poplar::Graph &graph, const poplar::Tensor &sparseTensor,
19 |                        SubBlockMask subBlockMask, unsigned blockRow,
20 |                        unsigned blockCol, unsigned blockRows,
21 |                        unsigned blockCols, const unsigned char *sparsity,
22 |                        unsigned numGroups, poplar::program::Sequence &prog,
23 |                        const poplar::DebugNameAndId &dnai);
24 | 
25 | } // namespace experimental
26 | } // namespace popsparse
27 | 
28 | #endif // popsparse_BSOps_hpp


--------------------------------------------------------------------------------
/lib/popsparse/BSUtils.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popsparse_BSUtils_hpp
 4 | #define popsparse_BSUtils_hpp
 5 | 
 6 | #include <poplar/Graph.hpp>
 7 | #include <poplar/Tensor.hpp>
 8 | #include <popsparse/experimental/BlockSparse.hpp>
 9 | 
10 | namespace popsparse {
11 | namespace experimental {
12 | 
13 | void bsCreateMaskTensor(poplar::Graph &graph, unsigned blockRow,
14 |                         unsigned blockCol, unsigned blockRows,
15 |                         unsigned blockCols, const unsigned char *sparsity,
16 |                         popsparse::experimental::SubBlockMask subBlockMaskType,
17 |                         unsigned numGroups, float maskedValue,
18 |                         float unMaskedValue, const poplar::Type &dataType,
19 |                         std::vector<poplar::Tensor> &maskBlocks,
20 |                         std::vector<unsigned> &diagBlockIdxs,
21 |                         std::vector<bool> &emptyRowsMask,
22 |                         const poplar::DebugNameAndId &dnai);
23 | 
24 | } // namespace experimental
25 | } // namespace popsparse
26 | 
27 | #endif // popsparse_BSUtils_hpp


--------------------------------------------------------------------------------
/lib/popsparse/BalancedPartitioner.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popsparse_BalancedPartitioner_hpp
 4 | #define popsparse_BalancedPartitioner_hpp
 5 | 
 6 | #include "HyperGraphPartitioner.hpp"
 7 | 
 8 | namespace popsparse {
 9 | namespace experimental {
10 | 
11 | class BalancedPartitioner : public HyperGraphPartitioner {
12 | public:
13 |   BalancedPartitioner() = default;
14 | 
15 |   virtual ~BalancedPartitioner() = default;
16 | 
17 |   virtual float partitionGraph(const HyperGraphData &graphData, int nPartition,
18 |                                std::vector<int> &nodeAssignment) override;
19 | 
20 | public:
21 |   static void partition(const std::vector<float> &nodeW, int nPartition,
22 |                         std::vector<int> &nodeAssignment);
23 | };
24 | 
25 | } // namespace experimental
26 | } // namespace popsparse
27 | 
28 | #endif


--------------------------------------------------------------------------------
/lib/popsparse/FullyConnectedPNMapping.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | #include "FullyConnectedPNMapping.hpp"
 3 | 
 4 | #include "poputil/exceptions.hpp"
 5 | 
 6 | #include "poplibs_support/VectorUtils.hpp"
 7 | 
 8 | namespace popsparse {
 9 | namespace fullyconnected {
10 | 
11 | PartitionToPNMapping::PartitionToPNMapping(
12 |     const Vector<unsigned> &linearisationOrder)
13 |     : linearisationOrder(linearisationOrder) {}
14 | 
15 | unsigned PartitionToPNMapping::getPNIdForPartition(
16 |     const Vector<unsigned> &partitions_, const Vector<unsigned> &index_) const {
17 |   unsigned id = 0;
18 |   const auto inverseOrder =
19 |       inversePermutation(linearisationOrder.asStdVector());
20 |   const auto &partitions = partitions_.asStdVector();
21 |   const auto &index = index_.asStdVector();
22 |   for (const auto dim : inverseOrder) {
23 |     id = id * partitions[dim] + index[dim];
24 |   }
25 |   return id;
26 | }
27 | 
28 | std::ostream &operator<<(std::ostream &os, const PartitionToPNMapping &m) {
29 |   os << m.linearisationOrder;
30 |   return os;
31 | }
32 | 
33 | } // end namespace fullyconnected
34 | } // end namespace popsparse
35 | 


--------------------------------------------------------------------------------
/lib/popsparse/FullyConnectedPNMapping.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | #ifndef popsparse_FullyConnectedPNMapping_hpp
 3 | #define popsparse_FullyConnectedPNMapping_hpp
 4 | 
 5 | #include "FullyConnectedVector.hpp"
 6 | 
 7 | #include <ostream>
 8 | #include <vector>
 9 | 
10 | namespace popsparse {
11 | namespace fullyconnected {
12 | 
13 | /** The order in which to map partitions of different dimensions
14 |  *  of a sparse fully connected layer operation to tiles.
15 |  */
16 | class PartitionToPNMapping {
17 |   Vector<unsigned> linearisationOrder;
18 | 
19 | public:
20 |   PartitionToPNMapping() = default;
21 |   PartitionToPNMapping(const PartitionToPNMapping &other) = default;
22 |   PartitionToPNMapping(PartitionToPNMapping &&other) = default;
23 |   PartitionToPNMapping &operator=(const PartitionToPNMapping &other) = default;
24 |   PartitionToPNMapping &operator=(PartitionToPNMapping &&other) = default;
25 | 
26 |   PartitionToPNMapping(const Vector<unsigned> &linearisationOrder);
27 |   unsigned getPNIdForPartition(const Vector<unsigned> &partitions,
28 |                                const Vector<unsigned> &index) const;
29 |   const Vector<unsigned> &getLinearisationOrder() const {
30 |     return linearisationOrder;
31 |   }
32 |   friend std::ostream &operator<<(std::ostream &os,
33 |                                   const PartitionToPNMapping &m);
34 | };
35 | 
36 | } // end namespace fullyconnected
37 | } // end namespace popsparse
38 | 
39 | #endif // popsparse_FullyConnectedPNMapping_hpp
40 | 


--------------------------------------------------------------------------------
/lib/popsparse/FullyConnectedTensorMetaData.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popsparse_FullyConnectedTensorMetaData_hpp
 4 | #define popsparse_FullyConnectedTensorMetaData_hpp
 5 | 
 6 | #include "PlanningCacheImpl.hpp"
 7 | 
 8 | #include "TensorMetaDataBase.hpp"
 9 | 
10 | namespace popsparse {
11 | namespace dynamic {
12 | 
13 | /// TensorMetaData for sparse tensors created for the fully connected layer
14 | class FullyConnectedTensorMetaData : public poputil::TensorMetaDataBase {
15 | public:
16 |   // In order to identify the fully connected layer this tensor was
17 |   // created for we just store the planning key which necessarily
18 |   // uniquely identifies this operation.
19 |   PlanningCacheImpl::Key planningKey;
20 |   FullyConnectedTensorMetaData(PlanningCacheImpl::Key planningKey)
21 |       : planningKey(std::move(planningKey)) {}
22 |   FullyConnectedTensorMetaData(const FullyConnectedParams &params,
23 |                                const fullyconnected::Options &options)
24 |       : planningKey(params, options) {}
25 |   virtual ~FullyConnectedTensorMetaData() {}
26 |   virtual std::unique_ptr<TensorMetaDataBase> clone() const override final {
27 |     return std::make_unique<FullyConnectedTensorMetaData>(planningKey);
28 |   }
29 | };
30 | 
31 | } // end namespace dynamic
32 | } // end namespace popsparse
33 | 
34 | #endif // popsparse_FullyConnectedTensorMetaData_hpp
35 | 


--------------------------------------------------------------------------------
/lib/popsparse/HyperGraphBlockNaive.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popsparse_HyperGraphBlockNaive_hpp
 4 | #define popsparse_HyperGraphBlockNaive_hpp
 5 | 
 6 | #include "HyperGraphBlock.hpp"
 7 | 
 8 | namespace popsparse {
 9 | namespace experimental {
10 | 
11 | /*
12 | This class uses simple tiles mapping scheme
13 | without any graph partitioning
14 | */
15 | class HyperGraphBlockNaive : public HyperGraphBlock {
16 | public:
17 |   HyperGraphBlockNaive(BlockMatrix &A, BlockMatrix &B,
18 |                        poplar::Type inDataTypeIn, poplar::Type outDataTypeIn,
19 |                        poplar::Type partialDataTypeIn, int nTileIn,
20 |                        int nTargetNodesVPerTileIn = TARGET_V_NODES_PER_TILE);
21 | 
22 |   virtual ~HyperGraphBlockNaive() = default;
23 | 
24 | protected:
25 |   virtual void partitionGraph() override;
26 | };
27 | 
28 | } // namespace experimental
29 | } // namespace popsparse
30 | 
31 | #endif


--------------------------------------------------------------------------------
/lib/popsparse/HyperGraphBlockZoltan.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popsparse_HyperGraphBlockZoltan_hpp
 4 | #define popsparse_HyperGraphBlockZoltan_hpp
 5 | 
 6 | #include "HyperGraphBlock.hpp"
 7 | #include "ZoltanPartitioner.hpp"
 8 | 
 9 | namespace popsparse {
10 | namespace experimental {
11 | 
12 | /*
13 | This class uses Zoltan library for partitioning.
14 | */
15 | class HyperGraphBlockZoltan : public HyperGraphBlock {
16 | 
17 | public:
18 |   HyperGraphBlockZoltan(BlockMatrix &A, BlockMatrix &B,
19 |                         poplar::Type inDataTypeIn, poplar::Type outDataTypeIn,
20 |                         poplar::Type partialDataTypeIn, int nTileIn,
21 |                         float memoryCycleRatioIn,
22 |                         int nTargetNodesVPerTileIn = TARGET_V_NODES_PER_TILE);
23 | 
24 |   virtual ~HyperGraphBlockZoltan() = default;
25 | 
26 | protected:
27 |   // Set up weights for a graph
28 |   virtual void setupWeights(const poplar::Graph &graph) override;
29 | 
30 |   virtual void partitionGraph() override;
31 | 
32 | private:
33 |   // Used to tune partitioning algorithm
34 |   float memoryCycleRatio;
35 | 
36 |   // Represents hypergraph in a Zoltan format
37 |   HyperGraphData getDataForPartitioner();
38 | };
39 | 
40 | } // namespace experimental
41 | } // namespace popsparse
42 | 
43 | #endif


--------------------------------------------------------------------------------
/lib/popsparse/PlanningCache.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | #include "PlanningCacheImpl.hpp"
 3 | #include <poplin/MatMul.hpp>
 4 | #include <popsparse/PlanningCache.hpp>
 5 | #include <poputil/DebugInfo.hpp>
 6 | 
 7 | namespace poputil {
 8 | template <>
 9 | poplar::ProfileValue
10 | toProfileValue(const popsparse::dynamic::PlanningCache &t) {
11 |   return poplar::ProfileValue("<popsparse::dynamic::PlanningCache>");
12 | }
13 | 
14 | template <>
15 | poplar::ProfileValue
16 | toProfileValue(const popsparse::static_::PlanningCache &t) {
17 |   return poplar::ProfileValue("<popsparse::static_::PlanningCache>");
18 | }
19 | } // namespace poputil
20 | 
21 | namespace popsparse {
22 | namespace dynamic {
23 | 
24 | PlanningCache::PlanningCache() : impl(new PlanningCacheImpl()) {}
25 | 
26 | PlanningCache::PlanningCache(poplin::PlanningCache *planningCache)
27 |     : impl(new PlanningCacheImpl(planningCache)) {}
28 | 
29 | PlanningCache::~PlanningCache() = default;
30 | 
31 | } // end namespace dynamic
32 | 
33 | namespace static_ {
34 | PlanningCache::PlanningCache() : impl(new PlanningCacheImpl()) {}
35 | PlanningCache::~PlanningCache() = default;
36 | } // end namespace static_
37 | 
38 | } // end namespace popsparse
39 | 


--------------------------------------------------------------------------------
/lib/popsparse/SparsePartitionerOptions.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #include "SparsePartitionerOptions.hpp"
 4 | 
 5 | #include <gccs/StructHelper.hpp>
 6 | 
 7 | namespace popsparse {
 8 | 
 9 | static constexpr auto partitionerHelper =
10 |     gccs::makeStructHelper(&PartitionerOptions::optimiseForSpeed,
11 |                            &PartitionerOptions::useActualWorkerSplitCosts,
12 |                            &PartitionerOptions::forceBucketSpills);
13 | 
14 | bool operator<(const PartitionerOptions &a, const PartitionerOptions &b) {
15 |   return partitionerHelper.lt(a, b);
16 | }
17 | 
18 | bool operator==(const PartitionerOptions &a, const PartitionerOptions &b) {
19 |   return partitionerHelper.eq(a, b);
20 | }
21 | 
22 | bool operator!=(const PartitionerOptions &a, const PartitionerOptions &b) {
23 |   return !(a == b);
24 | }
25 | 
26 | } // end namespace popsparse
27 | 


--------------------------------------------------------------------------------
/lib/popsparse/SparsePartitionerOptions.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popsparse_SparsePartitionerOptions_hpp
 4 | #define popsparse_SparsePartitionerOptions_hpp
 5 | 
 6 | namespace popsparse {
 7 | 
 8 | struct PartitionerOptions {
 9 |   // Optimise bucket overflow allocation for speed. Overflow allocation would
10 |   // attempt to allocate buckets that have the shortest distance to travel
11 |   bool optimiseForSpeed = true;
12 | 
13 |   // If set uses actual worker split every time costs for a partition are
14 |   // evaluated. This will give exact cost as the final "real" allocation, but
15 |   // is expensive to compute. If not set, then all workers are assumed to be
16 |   // used and the final allocation will actually be lower.
17 |   bool useActualWorkerSplitCosts = false;
18 | 
19 |   // Test mode to force bucket spills
20 |   bool forceBucketSpills = false;
21 | 
22 |   friend bool operator<(const PartitionerOptions &a,
23 |                         const PartitionerOptions &b);
24 |   friend bool operator==(const PartitionerOptions &a,
25 |                          const PartitionerOptions &b);
26 |   friend bool operator!=(const PartitionerOptions &a,
27 |                          const PartitionerOptions &b);
28 | };
29 | 
30 | } // end namespace popsparse
31 | 
32 | #endif // popsparse_SparsePartitionerOptions_hpp
33 | 


--------------------------------------------------------------------------------
/lib/popsparse/SparseTensor.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | #include "popsparse/SparseTensor.hpp"
 3 | #include "poputil/DebugInfo.hpp"
 4 | 
 5 | namespace poputil {
 6 | template <>
 7 | poplar::ProfileValue toProfileValue(const popsparse::dynamic::SparseTensor &t) {
 8 |   poplar::ProfileValue::Map v;
 9 | 
10 |   v.insert({"metaInfo", toProfileValue(t.getMetaInfoTensor())});
11 |   v.insert({"nzValues", toProfileValue(t.getNzValuesTensor())});
12 |   v.insert({"opMetaData", toProfileValue(t.getOpMetaData())});
13 | 
14 |   return v;
15 | }
16 | 
17 | template <>
18 | poplar::ProfileValue toProfileValue(const popsparse::static_::SparseTensor &t) {
19 |   poplar::ProfileValue::Map v;
20 | 
21 |   v.insert({"nzValues", toProfileValue(t.getNzValuesTensor())});
22 |   v.insert({"opMetaData", toProfileValue(t.getOpMetaData())});
23 | 
24 |   return v;
25 | }
26 | 
27 | } // namespace poputil
28 | 


--------------------------------------------------------------------------------
/lib/popsparse/ZoltanPartitioner.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #ifndef popsparse_ZoltanPartitioner_hpp
 4 | #define popsparse_ZoltanPartitioner_hpp
 5 | 
 6 | #include "HyperGraphPartitioner.hpp"
 7 | 
 8 | namespace popsparse {
 9 | namespace experimental {
10 | 
11 | /*
12 | A wrapper around Zoltan partitioning library
13 | */
14 | class ZoltanPartitioner : public HyperGraphPartitioner {
15 | public:
16 |   enum class PartitionType { BLOCK, HYPERGRAPH };
17 | 
18 |   ZoltanPartitioner(PartitionType partitionTypeIn)
19 |       : partitionType(partitionTypeIn) {}
20 | 
21 |   virtual ~ZoltanPartitioner() = default;
22 | 
23 |   virtual float partitionGraph(const HyperGraphData &graphData, int nPartition,
24 |                                std::vector<int> &nodeAssignment) override;
25 | 
26 |   PartitionType partitionType;
27 | };
28 | 
29 | } // namespace experimental
30 | } // namespace popsparse
31 | 
32 | #endif


--------------------------------------------------------------------------------
/lib/popsparse/codelets.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | #include "popsparseCycleEstimators.hpp"
 3 | #include <poplibs_support/codelets.hpp>
 4 | #include <popsparse/codelets.hpp>
 5 | 
 6 | using namespace poplar;
 7 | 
 8 | namespace popsparse {
 9 | 
10 | void addCodelets(Graph &graph) {
11 |   static poplibs::CurrentLibLocator loc;
12 |   graph.addCodelets(poplibs::getCodeletsPath("popsparse", "popsparse.gp", loc));
13 |   poputil::internal::registerPerfFunctions(graph, makePerfFunctionTable());
14 | }
15 | 
16 | } // end namespace popsparse
17 | 


--------------------------------------------------------------------------------
/lib/popsparse/codelets/asm/Block16x16SparseDenseMatMulGradA_half_float.S:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | //
 3 | // Performs sparse matrix multiplication Q = R * S Where
 4 | // Q and S are dense matrices and R is a sparse matrix
 5 | // with block size of 16x16
 6 | 
 7 | #if defined(__IPU__)
 8 | #include "BlockSparseDenseMatMul.h.S"
 9 | #include "poplar/AvailableVTypes.h"
10 | 
11 | // =============================================================================
12 | 
13 | #define CODELET_NAME __runCodelet_popsparse__SparseDenseMatMulBlockGradA___half_float_16_16
14 | 
15 | // =============================================================================
16 | 
17 | .extern blockSparseDenseMultiply_hf16x16
18 | .extern blockSparseDenseMultiply_hf16x16_retained
19 | 
20 | // =============================================================================
21 | // Supervisor codelet which launches the zeroing of the output Q matrix and
22 | // then parses the meta information buckets. Each bucket is walked through to
23 | // match the PNs subgroup id. 
24 | 
25 | // Instantiate supervisor codelet
26 | BLOCK_SPARSE_MATMUL CODELET_NAME half float hf16x16 16 1
27 | 
28 | #endif // defined(__IPU__)
29 | 


--------------------------------------------------------------------------------
/lib/popsparse/codelets/asm/Block16x16SparseDenseMatMulGradA_half_half.S:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | //
 3 | // Performs sparse matrix multiplication Q = R * S Where
 4 | // Q and S are dense matrices and R is a sparse matrix
 5 | // with block size of 16x16
 6 | 
 7 | #if defined(__IPU__)
 8 | #if __IPU_ARCH_VERSION__ == 1
 9 | #include "BlockSparseDenseMatMul.h.S"
10 | #include "poplar/AvailableVTypes.h"
11 | 
12 | // =============================================================================
13 | 
14 | #define CODELET_NAME __runCodelet_popsparse__SparseDenseMatMulBlockGradA___half_half_16_16
15 | 
16 | // =============================================================================
17 | 
18 | .extern blockSparseDenseMultiply_hh16x16
19 | .extern blockSparseDenseMultiply_hh16x16_retained
20 | 
21 | // =============================================================================
22 | // Supervisor codelet which launches the zeroing of the output Q matrix and
23 | // then parses the meta information buckets. Each bucket is walked through to
24 | // match the PNs subgroup id.
25 | 
26 | // Instantiate supervisor codelet
27 | BLOCK_SPARSE_MATMUL CODELET_NAME half half hh16x16 16 1
28 | 
29 | #endif // __IPU_ARCH_VERSION__ == 1
30 | #endif // defined(__IPU__)
31 | 


--------------------------------------------------------------------------------
/lib/popsparse/codelets/asm/Block16x16SparseDenseMatMulGradA_half_half_2ampsets.S:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | //
 3 | // Performs sparse matrix multiplication Q = R * S Where
 4 | // Q and S are dense matrices and R is a sparse matrix
 5 | // with block size of 16x16
 6 | 
 7 | #if defined(__IPU__)
 8 | #if (__IPU_ARCH_VERSION__ == 2) || (__IPU_ARCH_VERSION__ == 21)
 9 | #include "BlockSparseDenseMatMul.h.S"
10 | #include "poplar/AvailableVTypes.h"
11 | 
12 | // =============================================================================
13 | 
14 | #define CODELET_NAME __runCodelet_popsparse__SparseDenseMatMulBlockGradA___half_half_16_16
15 | 
16 | // =============================================================================
17 | 
18 | .extern blockSparseDenseMultiply_hh16x16
19 | .extern blockSparseDenseMultiply_hh16x16_retained
20 | 
21 | // =============================================================================
22 | // Supervisor codelet which launches the zeroing of the output Q matrix and
23 | // then parses the meta information buckets. Each bucket is walked through to
24 | // match the PNs subgroup id.
25 | 
26 | // Instantiate supervisor codelet
27 | BLOCK_SPARSE_MATMUL CODELET_NAME half half hh16x16 16 1
28 | 
29 | #endif // (__IPU_ARCH_VERSION__ == 2) || (__IPU_ARCH_VERSION__ == 21)
30 | #endif // defined(__IPU__)
31 | 


--------------------------------------------------------------------------------
/lib/popsparse/codelets/asm/Block4x4SparseDenseMatMulGradA_float_float.S:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | //
 3 | // Performs sparse matrix multiplication Q = R * S Where
 4 | // Q and S are dense matrices and R is a sparse matrix
 5 | // with block size of 4x4
 6 | //
 7 | 
 8 | #ifdef __IPU__
 9 | #include "BlockSparseDenseMatMul.h.S"
10 | #include "poplar/AvailableVTypes.h"
11 | 
12 | // =============================================================================
13 | 
14 | #define CODELET_NAME __runCodelet_popsparse__SparseDenseMatMulBlockGradA___float_float_4_4
15 | 
16 | // =============================================================================
17 | .extern blockSparseDenseMultiply_ff4x4
18 | .extern blockSparseDenseMultiply_ff4x4_retained
19 | 
20 | // =============================================================================
21 | // Supervisor codelet which launches the zeroing of the output Q matrix and
22 | // then parses the meta information buckets. Each bucket is walked through to
23 | // match the PNs subgroup id. 
24 | 
25 | // Instantiate supervisor codelet
26 | BLOCK_SPARSE_MATMUL CODELET_NAME float float ff4x4 4 1
27 | 
28 | // =============================================================================
29 | #endif // #ifdef __IPU__
30 | // =============================================================================
31 | 


--------------------------------------------------------------------------------
/lib/popsparse/codelets/asm/Block4x4SparseDenseMatMulGradA_half_float.S:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | //
 3 | // Performs sparse matrix multiplication Q = R * S Where
 4 | // Q and S are dense matrices and R is a sparse matrix
 5 | // with block size of 4x4
 6 | //
 7 | 
 8 | #ifdef __IPU__
 9 | #include "BlockSparseDenseMatMul.h.S"
10 | #include "poplar/AvailableVTypes.h"
11 | 
12 | // =============================================================================
13 | 
14 | #define CODELET_NAME __runCodelet_popsparse__SparseDenseMatMulBlockGradA___half_float_4_4
15 | 
16 | // =============================================================================
17 | 
18 | .global blockSparseDenseMultiply_hf4x4
19 | .global blockSparseDenseMultiply_hf4x4_retained
20 | 
21 | // =============================================================================
22 | // Supervisor codelet which launches the zeroing of the output Q matrix and
23 | // then parses the meta information buckets. Each bucket is walked through to
24 | // match the PNs subgroup id. 
25 | 
26 | // Instantiate supervisor codelet
27 | BLOCK_SPARSE_MATMUL CODELET_NAME half float hf4x4 4 1
28 | 
29 | // =============================================================================
30 | #endif // #ifdef __IPU__
31 | // =============================================================================
32 | 


--------------------------------------------------------------------------------
/lib/popsparse/codelets/asm/Block4x4SparseDenseMatMulGradA_half_half.S:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | //
 3 | // Performs sparse matrix multiplication Q = R * S Where
 4 | // Q and S are dense matrices and R is a sparse matrix
 5 | // with block size of 4x4
 6 | //
 7 | 
 8 | #ifdef __IPU__
 9 | #include "BlockSparseDenseMatMul.h.S"
10 | #include "poplar/AvailableVTypes.h"
11 | 
12 | // =============================================================================
13 | 
14 | #define CODELET_NAME __runCodelet_popsparse__SparseDenseMatMulBlockGradA___half_half_4_4
15 | 
16 | // =============================================================================
17 | 
18 | .extern blockSparseDenseMultiply_hh4x4
19 | .extern blockSparseDenseMultiply_hh4x4_retained
20 | 
21 | // =============================================================================
22 | // Supervisor codelet which launches the zeroing of the output Q matrix and
23 | // then parses the meta information buckets. Each bucket is walked through to
24 | // match the PNs subgroup id. 
25 | 
26 | // Instantiate supervisor codelet
27 | BLOCK_SPARSE_MATMUL CODELET_NAME half half hh4x4 4 1
28 | 
29 | // =============================================================================
30 | #endif // #ifdef __IPU__
31 | // =============================================================================
32 | 


--------------------------------------------------------------------------------
/lib/popsparse/codelets/asm/Block8x8SparseDenseMatMulGradA_float_float.S:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | //
 3 | // Performs sparse matrix multiplication Q = R * S Where
 4 | // Q and S are dense matrices and R is a sparse matrix
 5 | // with block size of 8x8
 6 | //
 7 | 
 8 | #ifdef __IPU__
 9 | #include "BlockSparseDenseMatMul.h.S"
10 | #include "poplar/AvailableVTypes.h"
11 | 
12 | // =============================================================================
13 | 
14 | #define CODELET_NAME __runCodelet_popsparse__SparseDenseMatMulBlockGradA___float_float_8_8
15 | 
16 | // =============================================================================
17 | .extern blockSparseDenseMultiply_ff8x8
18 | .extern blockSparseDenseMultiply_ff8x8_retained
19 | 
20 | // =============================================================================
21 | // Supervisor codelet which launches the zeroing of the output Q matrix and
22 | // then parses the meta information buckets. Each bucket is walked through to
23 | // match the PNs subgroup id. 
24 | 
25 | // Instantiate supervisor codelet
26 | BLOCK_SPARSE_MATMUL CODELET_NAME float float ff8x8 8 1
27 | 
28 | // =============================================================================
29 | #endif // #ifdef __IPU__
30 | // =============================================================================
31 | 


--------------------------------------------------------------------------------
/lib/popsparse/codelets/asm/Block8x8SparseDenseMatMulGradA_half_float.S:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | //
 3 | // Performs sparse matrix multiplication Q = R * S Where
 4 | // Q and S are dense matrices and R is a sparse matrix
 5 | // with block size of 8x8
 6 | //
 7 | 
 8 | #ifdef __IPU__
 9 | #include "BlockSparseDenseMatMul.h.S"
10 | #include "poplar/AvailableVTypes.h"
11 | 
12 | // =============================================================================
13 | 
14 | #define CODELET_NAME __runCodelet_popsparse__SparseDenseMatMulBlockGradA___half_float_8_8
15 | 
16 | // =============================================================================
17 | 
18 | .extern blockSparseDenseMultiply_hf8x8
19 | .extern blockSparseDenseMultiply_hf8x8_retained
20 | 
21 | // =============================================================================
22 | // Supervisor codelet which launches the zeroing of the output Q matrix and
23 | // then parses the meta information buckets. Each bucket is walked through to
24 | // match the PNs subgroup id. 
25 | 
26 | // Instantiate supervisor codelet
27 | BLOCK_SPARSE_MATMUL CODELET_NAME half float hf8x8 8 1
28 | 
29 | // =============================================================================
30 | #endif // #ifdef __IPU__
31 | // =============================================================================
32 | 


--------------------------------------------------------------------------------
/lib/popsparse/codelets/asm/Block8x8SparseDenseMatMulGradA_half_half.S:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | //
 3 | // Performs sparse matrix multiplication Q = R * S Where
 4 | // Q and S are dense matrices and R is a sparse matrix
 5 | // with block size of 8x8
 6 | //
 7 | 
 8 | #ifdef __IPU__
 9 | #include "BlockSparseDenseMatMul.h.S"
10 | #include "poplar/AvailableVTypes.h"
11 | 
12 | // =============================================================================
13 | 
14 | #define CODELET_NAME __runCodelet_popsparse__SparseDenseMatMulBlockGradA___half_half_8_8
15 | 
16 | // =============================================================================
17 | 
18 | .extern blockSparseDenseMultiply_hh8x8
19 | .extern blockSparseDenseMultiply_hh8x8_retained
20 | 
21 | // =============================================================================
22 | // Supervisor codelet which launches the zeroing of the output Q matrix and
23 | // then parses the meta information buckets. Each bucket is walked through to
24 | // match the PNs subgroup id. 
25 | 
26 | // Instantiate supervisor codelet
27 | BLOCK_SPARSE_MATMUL CODELET_NAME half half hh8x8 8 1
28 | 
29 | // =============================================================================
30 | #endif // #ifdef __IPU__
31 | // =============================================================================
32 | 


--------------------------------------------------------------------------------
/lib/popsparse/codelets/asm/BlockSparseMatMulStructs.h.S:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | //
 3 | // Header for Block Sparse-Dense matrix multiplication for GradW asm codelets
 4 | 
 5 | #ifndef _BlockSparseDenseMatMulStructs_h_
 6 | #define _BlockSparseDenseMatMulStructs_h_
 7 | 
 8 | // Bucket meta information field offsets and sizes (in bytes)
 9 | #define sizeof_MetaInfoSubGroupEntry                            14
10 | #define MetaInfoSubGroupEntry_id                                0
11 | #define MetaInfoSubGroupEntry_xPartition                        2
12 | #define MetaInfoSubGroupEntry_yPartition                        4
13 | #define MetaInfoSubGroupEntry_offsetToNextSubGroupSparseEntries 6
14 | #define MetaInfoSubGroupEntry_offsetToNextSubGroupMetaInfo      8
15 | #define MetaInfoSubGroupEntry_numXm1                            10
16 | #define MetaInfoSubGroupEntry_numGradWWorkers                   12
17 | 
18 | #define sizeof_MetaInfoGradWWorkerEntry                             8
19 | #define MetaInfoGradWWorkerEntry_sparseOffset                       0
20 | #define MetaInfoGradWWorkerEntry_metaInfoOffsetOutputEntry          2
21 | #define MetaInfoGradWWorkerEntry_metaInfoOffsetToOffsetsYInSFirst   4
22 | #define MetaInfoGradWWorkerEntry_totalNumY                          6
23 | 
24 | // =============================================================================
25 | #endif // #define _BlockSparseDenseMatMulStructs_h_
26 | // =============================================================================
27 | 


--------------------------------------------------------------------------------
/lib/popsparse/popsparseCycleEstimators.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | #ifndef popsparse_popsparseCycleEstimators_hpp
 3 | #define popsparse_popsparseCycleEstimators_hpp
 4 | 
 5 | #include <poputil/cyclesTables.hpp>
 6 | 
 7 | namespace popsparse {
 8 | 
 9 | poputil::internal::PerfEstimatorTable makePerfFunctionTable();
10 | 
11 | } // end namespace popsparse
12 | 
13 | #endif // popsparse_popsparseCycleEstimators_hpp
14 | 


--------------------------------------------------------------------------------
/lib/poputil/TensorMetaData.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #include "poputil/TensorMetaData.hpp"
 4 | #include "TensorMetaDataBase.hpp"
 5 | #include "poputil/DebugInfo.hpp"
 6 | 
 7 | namespace poputil {
 8 | 
 9 | template <>
10 | poplar::ProfileValue toProfileValue(const poputil::TensorMetaData &t) {
11 |   return poplar::ProfileValue("<poputil::TensorMetaData>");
12 | }
13 | 
14 | TensorMetaData::TensorMetaData() = default;
15 | TensorMetaData::TensorMetaData(const TensorMetaData &other) {
16 |   if (other.data) {
17 |     data = other.data->clone();
18 |   }
19 | }
20 | TensorMetaData::TensorMetaData(TensorMetaData &&other) = default;
21 | TensorMetaData::TensorMetaData(std::unique_ptr<TensorMetaDataBase> data)
22 |     : data(std::move(data)) {}
23 | TensorMetaData::~TensorMetaData() = default;
24 | 
25 | TensorMetaData &TensorMetaData::operator=(const TensorMetaData &other) {
26 |   if (other.data) {
27 |     data = other.data->clone();
28 |   }
29 |   return *this;
30 | }
31 | 
32 | } // end namespace poputil
33 | 


--------------------------------------------------------------------------------
/lib/poputil/TensorMetaDataBase.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | #ifndef poputil_TensorMetaDataBase_hpp
 3 | #define poputil_TensorMetaDataBase_hpp
 4 | 
 5 | #include "poputil/exceptions.hpp"
 6 | 
 7 | namespace poputil {
 8 | 
 9 | /** All meta-data given with a tensor derives from this class
10 |  *  and implements its methods.
11 |  */
12 | class TensorMetaDataBase {
13 | public:
14 |   virtual std::unique_ptr<TensorMetaDataBase> clone() const = 0;
15 |   virtual ~TensorMetaDataBase() {}
16 | };
17 | 
18 | } // end namespace poputil
19 | 
20 | #endif // poputil_TensorMetaDataBase_hpp
21 | 


--------------------------------------------------------------------------------
/lib/poputil/exceptions.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | #include "poputil/exceptions.hpp"
 3 | 
 4 | namespace poputil {
 5 | 
 6 | poplibs_error::poplibs_error(const std::string &s) : std::runtime_error(s) {
 7 |   type = __FUNCTION__;
 8 | }
 9 | poplibs_error::poplibs_error(const char *s) : std::runtime_error(s) {
10 |   type = __FUNCTION__;
11 | }
12 | 
13 | } // namespace poputil
14 | 


--------------------------------------------------------------------------------
/lib/poputil_mock/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | add_library(poputil_mock SHARED TileMapping.cpp)
 3 | 
 4 | target_link_libraries(poputil_mock
 5 |   PRIVATE
 6 |     gccs
 7 |     GTest::gtest
 8 |     GTest::gmock
 9 |     Boost::boost
10 | )
11 | 
12 | target_include_directories(poputil_mock
13 |   PUBLIC
14 |     $<TARGET_PROPERTY:poputil,INTERFACE_INCLUDE_DIRECTORIES>
15 | )
16 | 
17 | install(TARGETS poputil_mock
18 |         COMPONENT poputil_mock
19 |         EXPORT poputil_mock
20 |         DESTINATION ${CMAKE_INSTALL_LIBDIR}
21 |         INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
22 | )
23 | 
24 | install(EXPORT poputil_mock
25 |         DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/poputil_mock
26 |         FILE poputil_mock-targets.cmake
27 |         COMPONENT poputil_mock)
28 | 
29 | install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/poputil_mock
30 |         COMPONENT poputil_mock
31 |         DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
32 | )
33 | 


--------------------------------------------------------------------------------
/lib/poputil_mock/TileMapping.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #include <poputil_mock/Mock.hpp>
 3 | 
 4 | namespace poputil_mock {
 5 | MockPoputil *mockPoputil_ = nullptr;
 6 | } // namespace poputil_mock
 7 | 
 8 | namespace poputil {
 9 | 
10 | void mapTensorLinearly(poplar::Graph &graph, const poplar::Tensor &t,
11 |                        unsigned minElementsPerTile, unsigned grainSize) {
12 |   poputil_mock::mockPoputil_->mapTensorLinearly(graph, t, minElementsPerTile,
13 |                                                 grainSize);
14 | }
15 | 
16 | void mapTensorLinearly(poplar::Graph &graph, const poplar::Tensor &t) {
17 |   poputil_mock::mockPoputil_->mapTensorLinearly(graph, t);
18 | }
19 | 
20 | unsigned getTileImbalance(const poplar::Graph &graph, const poplar::Tensor &t,
21 |                           unsigned minElementsPerTile, unsigned grainSize) {
22 |   return poputil_mock::mockPoputil_->getTileImbalance(
23 |       graph, t, minElementsPerTile, grainSize);
24 | }
25 | 
26 | std::pair<poplar::Tensor, unsigned>
27 | cloneAndExpandAliasing(poplar::Graph &graph, const poplar::Tensor &t,
28 |                        unsigned offset,
29 |                        const poplar::DebugContext &debugContext) {
30 |   return poputil_mock::mockPoputil_->cloneAndExpandAliasing(graph, t, offset,
31 |                                                             debugContext);
32 | }
33 | 
34 | } // namespace poputil
35 | 


--------------------------------------------------------------------------------
/lsan.supp:
--------------------------------------------------------------------------------
 1 | leak:createColossusMCInstrInfo
 2 | leak:createX86MCInstrInfo
 3 | leak:clang::
 4 | leak:llvm::
 5 | leak:createMangleNumberingContext
 6 | leak:std::__cxx11::basic_string
 7 | leak:/llvm/llvm/
 8 | leak:tbb::internal::task_stream
 9 | leak:(<unknown module>)
10 | 


--------------------------------------------------------------------------------
/packaging_files/enable.sh.in:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z ${POPLAR_SDK_ENABLED+x} ]
 4 | then
 5 |   echo 'ERROR: You must enable a Poplar SDK before you can enable PopLibs.'
 6 | else
 7 |   [[ "$OSTYPE" == "linux-gnu" ]] || echo "WARNING: Only Linux is supported. Continue at own risk."
 8 |   DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
 9 |   export CPATH="$DIR/include:$CPATH"
10 |   export LD_LIBRARY_PATH="$DIR/lib:$LD_LIBRARY_PATH"
11 |   export LIBRARY_PATH="$DIR/lib:$LIBRARY_PATH"
12 | 
13 |   # For backward compatibility, create symbolic link <LIB>_local.so to each <LIB>.so file:
14 |   /usr/bin/find $DIR/lib*/ -name '*.so' -type f -exec /bin/sh -c 'LIB="{}"; /bin/ln -sf "${LIB}" "${LIB%.so}_local.so"' \;
15 | fi
16 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | progress==1.5
2 | 


--------------------------------------------------------------------------------
/tests/poplibs_support/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_unit_test(MultiArrayTest MultiArrayTest.cpp VARIANTS NoTarget)
 2 | add_unit_test(PlanConstraintsTest PlanConstraintsTest.cpp VARIANTS NoTarget)
 3 | add_unit_test(StridedRegionsTest StridedRegionsTest.cpp VARIANTS NoTarget)
 4 | 
 5 | add_unit_test(LoggingTest
 6 |               LoggingTest.cpp VARIANTS NoTarget)
 7 | if(TARGET LoggingTest)
 8 |   set(logging_test NoTarget_default_LoggingTest)
 9 |   if (TEST ${logging_test})
10 |     set(LoggingPrintoutExample "")
11 |     list(APPEND LoggingPrintoutExample "PL:POPFLOAT  [0-9]+\\.[0-9]+ I: Hello world")
12 |     list(APPEND LoggingPrintoutExample "PL:POPLIN    [0-9]+\\.[0-9]+ I: Hello world")
13 |     list(APPEND LoggingPrintoutExample "PL:POPNN     [0-9]+\\.[0-9]+ I: Hello world")
14 |     list(APPEND LoggingPrintoutExample "PL:POPOPS    [0-9]+\\.[0-9]+ I: Hello world")
15 |     list(APPEND LoggingPrintoutExample "PL:POPRAND   [0-9]+\\.[0-9]+ I: Hello world")
16 |     list(APPEND LoggingPrintoutExample "PL:POPSPARSE [0-9]+\\.[0-9]+ I: Hello world")
17 |     list(APPEND LoggingPrintoutExample "PL:POPUTIL   [0-9]+\\.[0-9]+ I: Hello world")
18 |     set(SelectiveLoggingPass "I'm printed")
19 |     set(v "${LoggingPrintoutExample};${SelectiveLoggingPass}")
20 |     list(JOIN v ".*" LoggingPassRegex)
21 | 
22 |     set(SelectiveLoggingFail "I'm not printed")
23 | 
24 |     set_tests_properties(${logging_test} PROPERTIES
25 |       PASS_REGULAR_EXPRESSION "${LoggingPassRegex}")
26 |     set_tests_properties(${logging_test} PROPERTIES
27 |       FAIL_REGULAR_EXPRESSION "${SelectiveLoggingFail}")
28 |   else()
29 |     message(WARNING "Could not find logging test")
30 |   endif()
31 | endif()
32 | 


--------------------------------------------------------------------------------
/tests/poplibs_support/LoggingTest.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | #define BOOST_TEST_MODULE LoggingTest
 3 | #include <boost/test/unit_test.hpp>
 4 | 
 5 | #include <poplibs_support/logging.hpp>
 6 | 
 7 | using namespace poplibs_support::logging;
 8 | 
 9 | // The output of these tests are checked by CMake regular expressions and are
10 | // not checked here explicitly.
11 | 
12 | BOOST_AUTO_TEST_CASE(LoggingPrintoutExample) {
13 |   setLogLevel(Module::popfloat, Level::Trace);
14 |   setLogLevel(Module::poplin, Level::Trace);
15 |   setLogLevel(Module::popnn, Level::Trace);
16 |   setLogLevel(Module::popops, Level::Trace);
17 |   setLogLevel(Module::poprand, Level::Trace);
18 |   setLogLevel(Module::popsparse, Level::Trace);
19 |   setLogLevel(Module::poputil, Level::Trace);
20 |   popfloat::info("Hello world");
21 |   poplin::info("Hello world");
22 |   popnn::info("Hello world");
23 |   popops::info("Hello world");
24 |   poprand::info("Hello world");
25 |   popsparse::info("Hello world");
26 |   poputil::info("Hello world");
27 | 
28 |   // Note that poplibs isn't a specific logging module. So the following line
29 |   // will not compile
30 |   // poplibs::err("Hello world");
31 | }
32 | 
33 | BOOST_AUTO_TEST_CASE(SelectiveLogging) {
34 |   // POPLIBS_POPFLOAT_LOG_LEVEL=ERR
35 |   setLogLevel(Module::popfloat, Level::Err);
36 |   // POPLIBS_POPLIN_LOG_LEVEL=TRACE
37 |   setLogLevel(Module::poplin, Level::Trace);
38 | 
39 |   popfloat::info("I'm not printed");
40 |   poplin::info("I'm printed");
41 | }
42 | 


--------------------------------------------------------------------------------
/tests/poplibs_test/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Note that although this is purely introspection-based,
2 | # we need an IpuModel in order to have > 1 tile.
3 | add_unit_test(IdenticalLayoutTest IdenticalLayoutTest.cpp
4 |               VARIANTS IpuModel2)
5 | 


--------------------------------------------------------------------------------
/tests/poplin/json/depthwise_conv_half.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataType": "half",
 3 |   "batchSize": 1,
 4 |   "numConvGroups": 24,
 5 |   "inputFieldShape": [28,28],
 6 |   "kernelShape": [5,5],
 7 |   "inputChannelsPerConvGroup": 1,
 8 |   "outputChannelsPerConvGroup": 1
 9 | }
10 | 


--------------------------------------------------------------------------------
/tests/poplin/json/depthwise_conv_quarter1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "inputType": "quarter",
 3 |   "outputType": "half",
 4 |   "batchSize": 1,
 5 |   "numConvGroups": 24,
 6 |   "inputFieldShape": [28,28],
 7 |   "kernelShape": [5,5],
 8 |   "inputChannelsPerConvGroup": 1,
 9 |   "outputChannelsPerConvGroup": 1,
10 |   "fp8FormatFwd": "F143",
11 |   "fp8ScaleFwd": 1,
12 |   "fp8FormatWeights": "F152",
13 |   "fp8ScaleWeights": -1,
14 |   "fp8FormatBwd": "F143",
15 |   "fp8ScaleBwd": 2
16 |  }
17 | 


--------------------------------------------------------------------------------
/tests/poplin/json/depthwise_conv_quarter2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "inputType": "quarter",
 3 |   "outputType": "half",
 4 |   "batchSize": 2,
 5 |   "numConvGroups": 24,
 6 |   "inputFieldShape": [32,32],
 7 |   "kernelShape": [5,5],
 8 |   "inputChannelsPerConvGroup": 1,
 9 |   "outputChannelsPerConvGroup": 1,
10 |   "fp8FormatFwd": "F152",
11 |   "fp8ScaleFwd": -1,
12 |   "fp8FormatWeights": "F143",
13 |   "fp8ScaleWeights": 1,
14 |   "fp8FormatBwd": "F152",
15 |   "fp8ScaleBwd": -2
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/poplin/json/method_amp.json:
--------------------------------------------------------------------------------
1 | {"method": {"type": "AMP"}}
2 | 


--------------------------------------------------------------------------------
/tests/poplin/json/pc_T10392.json:
--------------------------------------------------------------------------------
1 | {"0":{"transform":{"swapOperands":true}}}
2 | 


--------------------------------------------------------------------------------
/tests/poplin/json/pc_conv7x7_stride_2_1024_in_512_out_serial_fail_case.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "method": {"type": "AMP"},
 3 |   "inChansPerGroup": 4,
 4 |   "partialChansPerGroup": 8,
 5 |   "0": {
 6 |     "transform": {
 7 |       "swapOperands": false,
 8 |       "expandDims": [0],
 9 |       "outChanFlattenDims": []
10 |     },
11 |     "partition": {
12 |       "fieldSplit": {"0": 171, "1": 1},
13 |       "batchSplit": 1,
14 |       "outChanSplit": {
15 |         "serial": 64,
16 |         "parallel": 1
17 |       },
18 |       "kernelSplit": {"0": 1, "1": 1},
19 |       "inChanSplit": {
20 |         "serial": 1,
21 |         "parallel": 7
22 |       },
23 |       "convGroupSplit": 1
24 |     }
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/tests/poplin/json/pc_serial_split_ocx2.json:
--------------------------------------------------------------------------------
1 | {"0":{"partition":{"outChanSplit":{"serial":2}}}}
2 | 


--------------------------------------------------------------------------------
/tests/poplin/json/pc_serial_split_ocx3.json:
--------------------------------------------------------------------------------
1 | {"0":{"partition":{"outChanSplit":{"serial":3}}}}
2 | 


--------------------------------------------------------------------------------
/tests/poplin/json/pc_serial_split_ocx4.json:
--------------------------------------------------------------------------------
1 | {"0":{"partition":{"outChanSplit":{"serial":4}}}}
2 | 


--------------------------------------------------------------------------------
/tests/poplin/json/pointwise_conv_float.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataType": "float",
 3 |   "batchSize": 2,
 4 |   "numConvGroups": 1,
 5 |   "inputFieldShape": [17,32],
 6 |   "kernelShape": [1,1],
 7 |   "inputChannelsPerConvGroup": 16,
 8 |   "outputChannelsPerConvGroup": 25
 9 | }
10 | 


--------------------------------------------------------------------------------
/tests/poplin/json/pointwise_reusable_conv_float.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataType": "float",
 3 |   "batchSize": 2,
 4 |   "numConvGroups": 1,
 5 |   "inputFieldShape": [17,32],
 6 |   "kernelShape": [1,1],
 7 |   "inputChannelsPerConvGroup": 16,
 8 |   "outputChannelsPerConvGroup": 16
 9 | }
10 | 


--------------------------------------------------------------------------------
/tests/poplin/json/simple_depthwise_conv.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"0": {
 3 | 		"transform": {
 4 | 			"combineConvGroupsFactor": [
 5 | 				2,
 6 | 				4,
 7 | 				8,
 8 | 				16
 9 | 			]
10 | 		}
11 | 	},
12 | 	"1": {
13 | 		"transform": {
14 | 			"combineConvGroupsFactor": [
15 | 				2,
16 | 				4,
17 | 				8,
18 | 				16
19 | 			]
20 | 		}
21 | 	}
22 | }
23 | 


--------------------------------------------------------------------------------
/tests/poplin/json/slic144.json:
--------------------------------------------------------------------------------
1 | {
2 |   "method": {"type": "SLIC"},
3 |   "convGroupsPerGroup": 1,
4 |   "inChansPerGroup": 4,
5 |   "partialChansPerGroup": 4
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/poplin/json/slic1611.json:
--------------------------------------------------------------------------------
1 | {
2 |   "method": {"type": "SLIC"},
3 |   "convGroupsPerGroup": 16,
4 |   "inChansPerGroup": 1,
5 |   "partialChansPerGroup": 1
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/poplin/json/slic222.json:
--------------------------------------------------------------------------------
1 | {
2 |   "method": {"type": "SLIC"},
3 |   "convGroupsPerGroup": 2,
4 |   "inChansPerGroup": 2,
5 |   "partialChansPerGroup": 2
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/poplin/json/slic411.json:
--------------------------------------------------------------------------------
1 | {
2 |   "method": {"type": "SLIC"},
3 |   "convGroupsPerGroup": 4,
4 |   "inChansPerGroup": 1,
5 |   "partialChansPerGroup": 1
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/poplin/json/slic811.json:
--------------------------------------------------------------------------------
1 | {
2 |   "method": {"type": "SLIC"},
3 |   "convGroupsPerGroup":8,
4 |   "inChansPerGroup": 1,
5 |   "partialChansPerGroup": 1
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/poplin/vertexVerifier/ConvPartialQuarterHalf1x1OutConvGroups.vv:
--------------------------------------------------------------------------------
 1 | // *** Make sure that the outer loops (convGroups) are used to pick out
 2 | // *** bugs in pointer arithmetic
 3 | testConfig testConvGroups;
 4 | 
 5 | in.size = 4
 6 | in[i].size = 128
 7 | in[i]=[1,5)
 8 | 
 9 | weights.size = 16
10 | weights[i].size = 512
11 | weights[i] = [1,5)
12 | 
13 | out.size = 8
14 | out[i].size = 80
15 | out[0] = [0.1, 0.2)
16 | 
17 | worklists.size = 1
18 | worklists[0].size = 18
19 | worklists[0]={0, -3, 0, 0, -1, 0, 8, -2, 8, 0, -3, 0, 12, -2, 12, 16, -2, 16}
20 | 
21 | inMetadata.size = 1
22 | inMetadata[0].size = 1
23 | inMetadata[0] = {0x8a}
24 | 
25 | weightsMetadata.size = 1
26 | weightsMetadata[0].size = 1
27 | weightsMetadata[0] = {0x8a}
28 | 
29 | numConvGroupsM1 = 1
30 | numOutGroupsM1 = 3
31 | numInGroups = 2
32 | 
33 | transformedInStride=1
34 | outChansPerGroup=16
35 | transformedOutStride=4
36 | inChansPerGroup=32
37 | 


--------------------------------------------------------------------------------
/tests/popnn/GraphProgLocationTest.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | #define BOOST_TEST_MODULE GraphProgLocationTest
 3 | #include <boost/test/unit_test.hpp>
 4 | #include <fstream>
 5 | #include <poplar/Graph.hpp>
 6 | #include <popnn/codelets.hpp>
 7 | namespace utf = boost::unit_test;
 8 | namespace fpc = boost::test_tools::fpc;
 9 | 
10 | BOOST_AUTO_TEST_CASE(GraphProgLocation) {
11 |   poplar::Graph graph(poplar::Target::createCPUTarget());
12 |   BOOST_CHECK_NO_THROW(popnn::addCodelets(graph));
13 | }
14 | 


--------------------------------------------------------------------------------
/tests/popops/AllTrueTest.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | #define BOOST_TEST_MODULE AllTrueTest
 3 | #include <boost/test/unit_test.hpp>
 4 | #include <poplar/Engine.hpp>
 5 | #include <poplibs_support/TestDevice.hpp>
 6 | #include <popops/AllTrue.hpp>
 7 | #include <popops/codelets.hpp>
 8 | #include <poputil/exceptions.hpp>
 9 | 
10 | using namespace poplar;
11 | using namespace poplar::program;
12 | using namespace poputil;
13 | using namespace popops;
14 | using namespace poplibs_support;
15 | 
16 | #define DIM_SIZE 4
17 | 
18 | bool allTrueTest(bool in[DIM_SIZE]) {
19 |   auto device = createTestDevice(TEST_TARGET);
20 |   Graph graph(device.getTarget());
21 |   popops::addCodelets(graph);
22 | 
23 |   Tensor tIn = graph.addVariable(BOOL, {DIM_SIZE}, "t1");
24 |   graph.setTileMapping(tIn, 0);
25 | 
26 |   auto seq = Sequence();
27 |   const auto tOut = allTrue(graph, tIn, seq, "");
28 | 
29 |   graph.createHostWrite("in", tIn);
30 |   graph.createHostRead("out", tOut);
31 | 
32 |   bool out;
33 |   Engine eng(graph, seq);
34 |   device.bind([&](const Device &d) {
35 |     eng.load(d);
36 |     eng.writeTensor("in", in, &in[DIM_SIZE]);
37 |     eng.run();
38 |     eng.readTensor("out", &out, &out + 1);
39 |   });
40 |   return out;
41 | }
42 | 
43 | BOOST_AUTO_TEST_CASE(AllTrue) {
44 |   bool in[DIM_SIZE] = {true, true, true, true};
45 |   BOOST_CHECK_EQUAL(allTrueTest(in), true);
46 | }
47 | 
48 | BOOST_AUTO_TEST_CASE(NotAllTrue) {
49 |   bool in[DIM_SIZE] = {true, false, false, true};
50 |   BOOST_CHECK_EQUAL(allTrueTest(in), false);
51 | }
52 | 


--------------------------------------------------------------------------------
/tests/popops/PlanMultipleCorrectnessTest.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 Graphcore Ltd. All rights reserved.
 2 | #define BOOST_TEST_MODULE PlanMultipleCorrectnessTest
 3 | #include <boost/test/unit_test.hpp>
 4 | #include <poplibs_support/TestDevice.hpp>
 5 | #include <popops/DynamicSlice.hpp>
 6 | #include <vector>
 7 | 
 8 | using namespace poplar;
 9 | using namespace popops;
10 | using namespace popops::embedding;
11 | using namespace poplibs_support;
12 | 
13 | BOOST_AUTO_TEST_CASE(PlanMultipleCorrectness) {
14 |   constexpr static unsigned tilesPerIPU = 4;
15 |   auto device = createTestDevice(TEST_TARGET, 1, tilesPerIPU);
16 |   const auto &target = device.getTarget();
17 |   Graph graph(target);
18 | 
19 |   std::vector<SlicePlanningParameters> descriptions = {
20 |       SlicePlanningParameters(graph, HALF, 2, 1, 1, {1, 1}, {}),
21 |       SlicePlanningParameters(graph, FLOAT, 1, 4, 16, {2}, {}),
22 |   };
23 |   std::vector<SlicePlan> sequential_result;
24 |   for (auto d : descriptions) {
25 |     sequential_result.push_back(plan(d.graph, d.dataType, d.groupSize,
26 |                                      d.numEntries, d.outputSize, d.numLookups,
27 |                                      d.optionFlags));
28 |   }
29 |   std::vector<SlicePlan> parallel_result = planMultiple(descriptions);
30 |   // Would be pointless to compare elements if they're all the same.
31 |   BOOST_CHECK((sequential_result[0] != sequential_result[1]));
32 |   BOOST_REQUIRE(sequential_result == parallel_result);
33 | }
34 | 


--------------------------------------------------------------------------------
/tests/popops/codelets/BinaryOpRptLoopTest.vv:
--------------------------------------------------------------------------------
 1 | testConfig default;
 2 | in1.size = 1
 3 | in1[i].size = 20
 4 | in1[i]=[0,16.0)
 5 | 
 6 | in2.size = 1
 7 | in2[i].size = 20
 8 | in2[i]=[0,16.0)
 9 | 
10 | out.size = 1
11 | out[i].size = 20
12 | 


--------------------------------------------------------------------------------
/tests/popops/codelets/select/bool.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | // Use vector of unsigned chars instead of bools (which are not 1 byte in C++)
 3 | 
 4 | const std::vector<std::vector<unsigned char>> in1 = {
 5 |     {},
 6 |     {true},
 7 |     {true, true},
 8 |     {false, false, false},
 9 |     {false, false, false, false},
10 |     {true, false, true, false, true},
11 |     {0, 1, 0, 1, 0, 1, 0, 1, 0}};
12 | 
13 | const std::vector<std::vector<unsigned char>> in2 = {
14 |     {},
15 |     {false},
16 |     {false, false},
17 |     {true, true, true},
18 |     {true, true, true, true},
19 |     {false, true, false, true, false},
20 |     {1, 0, 1, 0, 1, 0, 1, 0, 1}};
21 | 
22 | const std::vector<std::vector<unsigned char>> expected = {
23 |     {},
24 |     {true},
25 |     {false, true},
26 |     {true, false, true},
27 |     {false, true, false, true},
28 |     {true, false, false, true, true},
29 |     {1, 0, 1, 1, 0, 1, 1, 1, 1}};
30 | 


--------------------------------------------------------------------------------
/tests/popops/codelets/select/float.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | const std::vector<std::vector<float>> in1 = {
 3 |     {},
 4 |     {1.1},
 5 |     {3.1, 3.3},
 6 |     {5.1, 5.3, 5.5},
 7 |     {7.1, 7.3, 7.5, 7.7},
 8 |     {9.1, 9.3, 9.5, 9.7, 9.9},
 9 |     {.1, .3, .5, .7, .9, 2.1, 2.3, 2.5, 2.7}};
10 | 
11 | const std::vector<std::vector<float>> in2 = {
12 |     {},
13 |     {2.0},
14 |     {4.0, 4.2},
15 |     {6.0, 6.2, 6.4},
16 |     {8.0, 8.2, 8.4, 8.6},
17 |     {10.0, 10.2, 10.4, 10.6, 10.8},
18 |     {.2, .4, .6, .8, 1.0, 2.2, 2.4, 2.6, 2.8}};
19 | 
20 | const std::vector<std::vector<float>> expected = {
21 |     {},
22 |     {1.1},
23 |     {4.0, 3.3},
24 |     {6.0, 5.3, 6.4},
25 |     {7.1, 8.2, 7.5, 8.6},
26 |     {9.1, 9.3, 10.4, 10.6, 9.9},
27 |     {.2, .4, .6, .7, .9, 2.1, 2.4, 2.5, 2.8}};
28 | 


--------------------------------------------------------------------------------
/tests/popops/codelets/select/half.hpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
2 | #include "float.hpp"
3 | 


--------------------------------------------------------------------------------
/tests/popops/codelets/select/int.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | const std::vector<std::vector<int>> in1 = {{},
 3 |                                            {11},
 4 |                                            {31, 33},
 5 |                                            {51, 53, 55},
 6 |                                            {71, 73, 75, 77},
 7 |                                            {91, 93, 95, 97, 99},
 8 |                                            {1, 3, 5, 7, 9, 21, 23, 25, 27}};
 9 | 
10 | const std::vector<std::vector<int>> in2 = {{},
11 |                                            {20},
12 |                                            {40, 42},
13 |                                            {60, 62, 64},
14 |                                            {80, 82, 84, 86},
15 |                                            {100, 102, 104, 106, 108},
16 |                                            {2, 4, 6, 8, 10, 22, 24, 26, 28}};
17 | 
18 | const std::vector<std::vector<int>> expected = {
19 |     {},
20 |     {11},
21 |     {40, 33},
22 |     {60, 53, 64},
23 |     {71, 82, 75, 86},
24 |     {91, 93, 104, 106, 99},
25 |     {2, 4, 6, 7, 9, 21, 24, 25, 28}};
26 | 


--------------------------------------------------------------------------------
/tests/popops/codelets/select/unsigned_int.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | const std::vector<std::vector<int>> in1 = {{},
 3 |                                            {11},
 4 |                                            {31, 33},
 5 |                                            {51, 53, 55},
 6 |                                            {71, 73, 75, 77},
 7 |                                            {91, 93, 95, 97, 99},
 8 |                                            {1, 3, 5, 7, 9, 21, 23, 25, 27}};
 9 | 
10 | const std::vector<std::vector<int>> in2 = {{},
11 |                                            {20},
12 |                                            {40, 42},
13 |                                            {60, 62, 64},
14 |                                            {80, 82, 84, 86},
15 |                                            {100, 102, 104, 106, 108},
16 |                                            {2, 4, 6, 8, 10, 22, 24, 26, 28}};
17 | 
18 | const std::vector<std::vector<int>> expected = {
19 |     {},
20 |     {11},
21 |     {40, 33},
22 |     {60, 53, 64},
23 |     {71, 82, 75, 86},
24 |     {91, 93, 104, 106, 99},
25 |     {2, 4, 6, 7, 9, 21, 24, 25, 28}};
26 | 


--------------------------------------------------------------------------------
/tests/popops/infiles/reduceInT59445.tensor:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graphcore/poplibs/7eac0f266ced5b064b3284a3cd90d491745b387c/tests/popops/infiles/reduceInT59445.tensor


--------------------------------------------------------------------------------
/tests/popsparse/bs-m8x8_0.8_nr.txt:
--------------------------------------------------------------------------------
 1 | 8 8
 2 | 0 0 0 0 0 0 0 1
 3 | 0 0 1 0 1 0 0 0
 4 | 0 1 0 0 1 0 0 0
 5 | 0 0 1 0 0 1 0 0
 6 | 1 0 1 0 0 0 0 0
 7 | 0 0 0 0 0 0 0 0
 8 | 0 0 0 0 0 0 0 0
 9 | 0 1 0 1 0 0 1 0
10 | 


--------------------------------------------------------------------------------
/tests/poputil/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_unit_test(BroadcastToMatchTest BroadcastToMatchTest.cpp)
 2 | add_unit_test(CopyToIpu CopyToIpu.cpp VARIANTS ${IPUMODEL_VARIANTS})
 3 | add_unit_test(DuplicateTensor DuplicateTensor.cpp VARIANTS ${IPUMODEL_VARIANTS})
 4 | add_unit_test(GraphFunctionTest GraphFunctionTest.cpp)
 5 | add_unit_test(GraphReplication GraphReplication.cpp)
 6 | add_unit_test(LargeSplitRegionsTest LargeSplitRegionsTest.cpp)
 7 | add_unit_test(TileMappingTest TileMappingTest.cpp VARIANTS ${IPUMODEL_VARIANTS})
 8 | add_unit_test(UtilTest UtilTest.cpp VARIANTS NoTarget)
 9 | add_unit_test(VarStructureTest VarStructureTest.cpp VARIANTS ${IPUMODEL_VARIANTS})
10 | add_unit_test(OptionParsingTest OptionParsingTest.cpp VARIANTS NoTarget)
11 | 


--------------------------------------------------------------------------------
/tests/poputil/UtilTest.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Graphcore Ltd. All rights reserved.
 2 | #define BOOST_TEST_MODULE UtilTest
 3 | 
 4 | #include <boost/test/unit_test.hpp>
 5 | #include <poplar/Interval.hpp>
 6 | #include <poputil/Util.hpp>
 7 | 
 8 | #include <vector>
 9 | 
10 | BOOST_AUTO_TEST_CASE(CalculateUnshufflingIntervals) {
11 |   auto test = [](const std::vector<poplar::Interval> &intervals,
12 |                  const std::vector<poplar::Interval> &expected) {
13 |     const auto actual = poputil::calculateUnshufflingIntervals(intervals);
14 |     BOOST_CHECK_EQUAL(actual.size(), expected.size());
15 |     for (std::size_t i = 0; i < actual.size(); i++) {
16 |       BOOST_CHECK_EQUAL(actual[i].lower(), expected[i].lower());
17 |       BOOST_CHECK_EQUAL(actual[i].upper(), expected[i].upper());
18 |     }
19 |   };
20 | 
21 |   // original: 0, 1, 2, 3, 4
22 |   // shuffled: 0, 1, 2, 3, 4
23 |   test({{0, 1}, {1, 2}, {2, 5}}, {{0, 1}, {1, 2}, {2, 5}});
24 | 
25 |   // original: 0, 1, 2, 3, 4
26 |   // shuffled: 3, 4, 0, 1, 2
27 |   test({{3, 5}, {0, 1}, {1, 3}}, {{2, 3}, {3, 5}, {0, 2}});
28 | 
29 |   // original: 0, 1, 2, 3, 4
30 |   // shuffled: 4, 3, 2, 1, 0
31 |   test({{4, 5}, {3, 4}, {2, 3}, {1, 2}, {0, 1}},
32 |        {{4, 5}, {3, 4}, {2, 3}, {1, 2}, {0, 1}});
33 | 
34 |   // original: 0, 1, 2, 3, 4
35 |   // shuffled: 4, 3, 0, 1, 2
36 |   test({{4, 5}, {3, 4}, {0, 3}}, {{2, 5}, {1, 2}, {0, 1}});
37 | }
38 | 


--------------------------------------------------------------------------------
/tests/sanity/ConsistentExecutableTest.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Graphcore Ltd. All rights reserved.
 2 | #include <cstdio>
 3 | 
 4 | #define BOOST_TEST_MODULE ConsistentExecutableTest
 5 | #include <boost/test/unit_test.hpp>
 6 | 
 7 | static int checkExit(int rc) {
 8 |   // Return 0 if the process exited normally otherwise the exit status code.
 9 |   return WIFEXITED(rc) ? WEXITSTATUS(rc) : rc;
10 | }
11 | 
12 | BOOST_AUTO_TEST_CASE(ConsistentExecutable) {
13 |   // Create two executables and check they're identical.
14 | #ifndef EXECUTABLE
15 | #error "EXECUTABLE must be defined"
16 | #endif
17 |   BOOST_TEST(checkExit(system(EXECUTABLE " one.exe")) == EXIT_SUCCESS);
18 |   BOOST_TEST(checkExit(system(EXECUTABLE " two.exe")) == EXIT_SUCCESS);
19 |   BOOST_TEST(checkExit(system("cmp one.exe two.exe")) == EXIT_SUCCESS);
20 |   std::remove("one.exe");
21 |   std::remove("two.exe");
22 | }
23 | 


--------------------------------------------------------------------------------
/tests/sanity/EnumerateDevices.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | 
 3 | #define BOOST_TEST_MODULE EnumerateDevices
 4 | #include "poplibs_support/TestDevice.hpp"
 5 | using namespace poplibs_support;
 6 | 
 7 | BOOST_AUTO_TEST_CASE(Enumerate) {
 8 |   // This test relies on createTestDevice() throwing in case of failure to
 9 |   // enumerate at least one device. Could be more sophisticed in future:
10 |   auto device = createTestDeviceFullSize(DeviceType::Hw);
11 |   const auto argc = boost::unit_test::framework::master_test_suite().argc;
12 |   const auto argv = boost::unit_test::framework::master_test_suite().argv;
13 |   if (argc > 1) {
14 |     if (argc != 2) {
15 |       throw std::logic_error(
16 |           "Too many arguments: Takes one optional arguments: <arch>");
17 |     }
18 |     const auto archString = device.getTarget().getTargetArchString();
19 |     const auto requiredArchString = argv[1];
20 |     BOOST_CHECK(archString == requiredArchString);
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/tests/sanity/ParallelTest.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2018 Graphcore Ltd. All rights reserved.
 2 | #define BOOST_TEST_MODULE ParallelTest
 3 | #include <boost/test/unit_test.hpp>
 4 | #include <poplibs_support/TestDevice.hpp>
 5 | #include <popops/codelets.hpp>
 6 | 
 7 | #include <atomic>
 8 | #include <iostream>
 9 | #include <thread>
10 | 
11 | using namespace poplar;
12 | using namespace popops;
13 | using namespace poplibs_support;
14 | 
15 | BOOST_AUTO_TEST_CASE(ManyParallelGraphLoads) {
16 |   std::atomic<bool> success{true};
17 | 
18 |   const size_t nthreads = std::thread::hardware_concurrency();
19 | 
20 |   std::vector<std::thread> threads;
21 | 
22 |   for (unsigned t = 0; t < nthreads; t++) {
23 |     threads.push_back(std::thread([&]() {
24 |       // Exceptions can't be thrown across threads so just
25 |       // catch everything and print a message if it failed.
26 |       try {
27 |         auto device = createTestDevice(TEST_TARGET);
28 | 
29 |         Graph graph(device.getTarget());
30 |         popops::addCodelets(graph);
31 |       } catch (const std::exception &e) {
32 |         std::cout << ((std::string("Exception: ") + e.what()) + "\n");
33 |         success = false;
34 |       }
35 |     }));
36 |   }
37 | 
38 |   for (unsigned t = 0; t < nthreads; t++) {
39 |     threads[t].join();
40 |   }
41 | 
42 |   BOOST_CHECK(success);
43 | }
44 | 


--------------------------------------------------------------------------------
/tests/sanity/dependencies/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(poplib_libraries "poplibs_support;poputil;popops;poprand;popfloat;poplin;popnn;popsparse")
 2 | 
 3 | # Test that we only include files against linked libraries
 4 | # CMake deals with cyclic dependencies as long as we are upfront with dependencies
 5 | set(arg_string "")
 6 | foreach (lib IN LISTS poplib_libraries)
 7 |     if(TARGET ${lib})
 8 |         get_target_property(deps_semi_colon_sep ${lib} LINK_LIBRARIES)
 9 |         string(REPLACE "-" "" deps_semi_colon_sep "${deps_semi_colon_sep}") # So python isn't confused
10 |         string(APPEND arg_string "-d;${lib};${deps_semi_colon_sep};")
11 |     endif()
12 | endforeach()
13 | 
14 | add_test(
15 |     NAME poplibs_cyclic_dependency
16 |     COMMAND ${PYTHON_EXECUTABLE}
17 |             ${CMAKE_SOURCE_DIR}/tests/sanity/dependencies/check_cmake_link_include.py
18 |             ${CMAKE_SOURCE_DIR}
19 |             ${arg_string}
20 | )
21 | # T22741: There is currently a cyclic dependency with option parsing in plan constraints
22 | set_tests_properties(poplibs_cyclic_dependency PROPERTIES DISABLED TRUE)
23 | set(poplibs_cyclic_dependency_LABELS "python_tool")
24 | add_parent_dir_labels(poplibs_cyclic_dependency_LABELS)
25 | if (poplibs_cyclic_dependency_LABELS)
26 |     set_tests_properties(poplibs_cyclic_dependency
27 |         PROPERTIES LABELS "${poplibs_cyclic_dependency_LABELS}")
28 | endif()
29 | 


--------------------------------------------------------------------------------
/tests/soak-hw.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tests": [
 3 |         {
 4 |             "name": "single conv layer - Hw compared against model",
 5 |             "program": "${ROOT_DIR}/poplibs/tools/single_conv_layer_random.py",
 6 |             "args": [
 7 |                 "--seed", "${TEST_SEED}",
 8 |                 "--device-type", "Hw",
 9 |                 "--ipus", "1",
10 |                 "--binary", "${BUILD_DIR}/build/poplibs/tools/single_conv_layer"
11 |             ],
12 |             "env": {
13 |               "POPLAR_RUNTIME_OPTIONS": "{\"target.hostSyncTimeout\": \"600.0\"}"
14 |             },
15 |             "testArgs": {
16 |                 "timeout": 1200,
17 |                 "skip": [77, 245],
18 |                 "bailout": 10
19 |             },
20 |             "weight": 0.9
21 |         },
22 |         {
23 |             "name": "single conv layer - Hw compared against previous runs",
24 |             "program": "${ROOT_DIR}/poplibs/tools/single_conv_layer_random.py",
25 |             "args": [
26 |                 "--seed", "${TEST_SEED}",
27 |                 "--device-type", "Hw",
28 |                 "--num-determinism-checks", "5",
29 |                 "--ipus", "1",
30 |                 "--binary", "${BUILD_DIR}/build/poplibs/tools/single_conv_layer"
31 |             ],
32 |             "env": {
33 |               "POPLAR_RUNTIME_OPTIONS": "{\"target.hostSyncTimeout\": \"600.0\"}"
34 |             },
35 |             "testArgs": {
36 |                 "timeout": 1200,
37 |                 "skip": [77, 245],
38 |                 "bailout": 10
39 |             },
40 |             "weight": 0.1
41 |         }
42 |     ]
43 | }
44 | 


--------------------------------------------------------------------------------
/tests/soak.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tests": [
 3 |         {
 4 |             "name": "single conv layer - Sim2",
 5 |             "program": "${ROOT_DIR}/poplibs/tools/single_conv_layer_random.py",
 6 |             "args": [
 7 |                 "--seed", "${TEST_SEED}",
 8 |                 "--device-type", "Sim2",
 9 |                 "--binary", "${BUILD_DIR}/build/poplibs/tools/single_conv_layer"
10 |             ],
11 |             "env": {
12 |               "POPLAR_RUNTIME_OPTIONS": "{\"target.hostSyncTimeout\": \"600.0\"}"
13 |             },
14 |             "testArgs": {
15 |                 "timeout": 1200,
16 |                 "skip": [77, 245],
17 |                 "bailout": 10
18 |             },
19 |             "weight": 9,
20 |             "parallel": 40
21 |         },
22 |         {
23 |             "name": "multi conv layer - Sim2",
24 |             "program": "${ROOT_DIR}/poplibs/tools/single_conv_layer_random.py",
25 |             "args": [
26 |                 "--seed", "${TEST_SEED}",
27 |                 "--device-type", "Sim2",
28 |                 "--binary", "${BUILD_DIR}/build/poplibs/tools/multi_conv_layer",
29 |                 "--large",
30 |                 "--json",
31 |                 "--num-convs", "0"
32 |             ],
33 |             "env": {
34 |               "POPLAR_RUNTIME_OPTIONS": "{\"target.hostSyncTimeout\": \"600.0\"}"
35 |             },
36 |             "testArgs": {
37 |                 "timeout": 1200,
38 |                 "skip": [77, 245],
39 |                 "bailout": 10
40 |             },
41 |             "weight": 1,
42 |             "parallel": 40
43 |         }
44 |     ]
45 | }
46 | 


--------------------------------------------------------------------------------
/tools/single_conv_soak.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) 2019 Graphcore Ltd. All rights reserved.
 3 | 
 4 | import os
 5 | import subprocess
 6 | import sys
 7 | 
 8 | if len(sys.argv) != 2:
 9 |     raise Exception(f"{sys.argv[0]} expects a single argument, the random seed")
10 | 
11 | seed = sys.argv[1]
12 | script = os.path.join(sys.path[0], "single_conv_layer_random.py")
13 | 
14 | rc = subprocess.call([
15 |     script,
16 |     "--n", "1",
17 |     "--seed", seed,
18 |     "--device-type", "IpuModel",
19 |     "--tiles-per-ipu", "1216",
20 | ])
21 | sys.exit(rc)
22 | 


--------------------------------------------------------------------------------